meerschaum.connectors

Create connectors with meerschaum.connectors.get_connector(). For ease of use, you can also import from the root meerschaum module:

>>> from meerschaum import get_connector
>>> conn = get_connector()
  1#! /usr/bin/env python
  2# -*- coding: utf-8 -*-
  3# vim:fenc=utf-8
  4
  5"""
  6Create connectors with `meerschaum.connectors.get_connector()`.
  7For ease of use, you can also import from the root `meerschaum` module:
  8```
  9>>> from meerschaum import get_connector
 10>>> conn = get_connector()
 11```
 12"""
 13
 14from __future__ import annotations
 15
 16import meerschaum as mrsm
 17from meerschaum.utils.typing import Any, Union, List, Dict, Optional
 18from meerschaum.utils.threading import RLock
 19from meerschaum.utils.warnings import warn
 20
 21from meerschaum.connectors._Connector import Connector, InvalidAttributesError
 22from meerschaum.connectors.instance._InstanceConnector import InstanceConnector
 23from meerschaum.connectors.sql._SQLConnector import SQLConnector
 24from meerschaum.connectors.api._APIConnector import APIConnector
 25
 26__all__ = (
 27    "make_connector",
 28    "Connector",
 29    "InstanceConnector",
 30    "SQLConnector",
 31    "APIConnector",
 32    "get_connector",
 33    "is_connected",
 34    "poll",
 35    "api",
 36    "sql",
 37    "valkey",
 38    "parse",
 39)
 40
 41### store connectors partitioned by
 42### type, label for reuse
 43connectors: Dict[str, Dict[str, Connector]] = {
 44    'api'    : {},
 45    'sql'    : {},
 46    'plugin' : {},
 47    'valkey' : {},
 48}
 49instance_types: List[str] = ['sql', 'api']
 50_locks: Dict[str, RLock] = {
 51    'connectors'               : RLock(),
 52    'types'                    : RLock(),
 53    'custom_types'             : RLock(),
 54    'plugins_types'            : RLock(),
 55    '_loaded_plugin_connectors': RLock(),
 56    'instance_types'           : RLock(),
 57}
 58
 59### Fill this with objects only when connectors are first referenced.
 60types: Dict[str, Any] = {}
 61custom_types: set = set()
 62plugins_types: Dict[str, List[str]] = {}
 63_known_custom_types: set = set()
 64_loaded_plugin_connectors: bool = False
 65
 66
 67def get_connector(
 68    type: str = None,
 69    label: str = None,
 70    refresh: bool = False,
 71    debug: bool = False,
 72    _load_plugins: bool = True,
 73    **kw: Any
 74) -> Connector:
 75    """
 76    Return existing connector or create new connection and store for reuse.
 77    
 78    You can create new connectors if enough parameters are provided for the given type and flavor.
 79
 80    Parameters
 81    ----------
 82    type: Optional[str], default None
 83        Connector type (sql, api, etc.).
 84        Defaults to the type of the configured `instance_connector`.
 85
 86    label: Optional[str], default None
 87        Connector label (e.g. main). Defaults to `'main'`.
 88
 89    refresh: bool, default False
 90        Refresh the Connector instance / construct new object. Defaults to `False`.
 91
 92    kw: Any
 93        Other arguments to pass to the Connector constructor.
 94        If the Connector has already been constructed and new arguments are provided,
 95        `refresh` is set to `True` and the old Connector is replaced.
 96
 97    Returns
 98    -------
 99    A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`,
100    `meerschaum.connectors.sql.SQLConnector`).
101    
102    Examples
103    --------
104    The following parameters would create a new
105    `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file.
106
107    ```
108    >>> conn = get_connector(
109    ...     type = 'sql',
110    ...     label = 'newlabel',
111    ...     flavor = 'sqlite',
112    ...     database = '/file/path/to/database.db'
113    ... )
114    >>>
115    ```
116
117    """
118    from meerschaum.connectors.parse import parse_instance_keys
119    from meerschaum.config import get_config
120    from meerschaum._internal.static import STATIC_CONFIG
121    from meerschaum.utils.warnings import warn
122    global _loaded_plugin_connectors
123    if isinstance(type, str) and not label and ':' in type:
124        type, label = type.split(':', maxsplit=1)
125
126    if _load_plugins:
127        with _locks['_loaded_plugin_connectors']:
128            if not _loaded_plugin_connectors:
129                load_plugin_connectors()
130                _load_builtin_custom_connectors()
131                _loaded_plugin_connectors = True
132
133    if type is None and label is None:
134        default_instance_keys = get_config('meerschaum', 'instance', patch=True)
135        ### recursive call to get_connector
136        return parse_instance_keys(default_instance_keys)
137
138    ### NOTE: the default instance connector may not be main.
139    ### Only fall back to 'main' if the type is provided by the label is omitted.
140    label = label if label is not None else STATIC_CONFIG['connectors']['default_label']
141
142    ### type might actually be a label. Check if so and raise a warning.
143    if type not in connectors:
144        possibilities, poss_msg = [], ""
145        for _type in get_config('meerschaum', 'connectors'):
146            if type in get_config('meerschaum', 'connectors', _type):
147                possibilities.append(f"{_type}:{type}")
148        if len(possibilities) > 0:
149            poss_msg = " Did you mean"
150            for poss in possibilities[:-1]:
151                poss_msg += f" '{poss}',"
152            if poss_msg.endswith(','):
153                poss_msg = poss_msg[:-1]
154            if len(possibilities) > 1:
155                poss_msg += " or"
156            poss_msg += f" '{possibilities[-1]}'?"
157
158        warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False)
159        return None
160
161    if 'sql' not in types:
162        from meerschaum.connectors.plugin import PluginConnector
163        from meerschaum.connectors.valkey import ValkeyConnector
164        with _locks['types']:
165            types.update({
166                'api': APIConnector,
167                'sql': SQLConnector,
168                'plugin': PluginConnector,
169                'valkey': ValkeyConnector,
170            })
171
172    ### determine if we need to call the constructor
173    if not refresh:
174        ### see if any user-supplied arguments differ from the existing instance
175        if label in connectors[type]:
176            warning_message = None
177            for attribute, value in kw.items():
178                if attribute not in connectors[type][label].meta:
179                    import inspect
180                    cls = connectors[type][label].__class__
181                    cls_init_signature = inspect.signature(cls)
182                    cls_init_params = cls_init_signature.parameters
183                    if attribute not in cls_init_params:
184                        warning_message = (
185                            f"Received new attribute '{attribute}' not present in connector " +
186                            f"{connectors[type][label]}.\n"
187                        )
188                elif connectors[type][label].__dict__[attribute] != value:
189                    warning_message = (
190                        f"Mismatched values for attribute '{attribute}' in connector "
191                        + f"'{connectors[type][label]}'.\n" +
192                        f"  - Keyword value: '{value}'\n" +
193                        f"  - Existing value: '{connectors[type][label].__dict__[attribute]}'\n"
194                    )
195            if warning_message is not None:
196                warning_message += (
197                    "\nSetting `refresh` to True and recreating connector with type:"
198                    + f" '{type}' and label '{label}'."
199                )
200                refresh = True
201                warn(warning_message)
202        else: ### connector doesn't yet exist
203            refresh = True
204
205    ### only create an object if refresh is True
206    ### (can be manually specified, otherwise determined above)
207    if refresh:
208        with _locks['connectors']:
209            try:
210                ### will raise an error if configuration is incorrect / missing
211                conn = types[type](label=label, **kw)
212                connectors[type][label] = conn
213            except InvalidAttributesError as ie:
214                warn(
215                    f"Incorrect attributes for connector '{type}:{label}'.\n"
216                    + str(ie),
217                    stack = False,
218                )
219                conn = None
220            except Exception as e:
221                from meerschaum.utils.formatting import get_console
222                console = get_console()
223                if console:
224                    console.print_exception()
225                warn(
226                    f"Exception when creating connector '{type}:{label}'.\n" + str(e),
227                    stack = False,
228                )
229                conn = None
230        if conn is None:
231            return None
232
233    return connectors[type][label]
234
235
236def is_connected(keys: str, **kw) -> bool:
237    """
238    Check if the connector keys correspond to an active connection.
239    If the connector has not been created, it will immediately return `False`.
240    If the connector exists but cannot communicate with the source, return `False`.
241    
242    **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`).
243    Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
244
245    Parameters
246    ----------
247    keys:
248        The keys to the connector (e.g. `'sql:main'`).
249        
250    Returns
251    -------
252    A `bool` corresponding to whether a successful connection may be made.
253
254    """
255    import warnings
256    if ':' not in keys:
257        warn(f"Invalid connector keys '{keys}'")
258
259    try:
260        typ, label = keys.split(':')
261    except Exception:
262        return False
263    if typ not in instance_types:
264        return False
265    if label not in connectors.get(typ, {}):
266        return False
267
268    from meerschaum.connectors.parse import parse_instance_keys
269    conn = parse_instance_keys(keys)
270    try:
271        with warnings.catch_warnings():
272            warnings.filterwarnings('ignore')
273            return conn.test_connection(**kw)
274    except Exception:
275        return False
276
277
278def make_connector(cls, _is_executor: bool = False):
279    """
280    Register a class as a `Connector`.
281    The `type` will be the lower case of the class name, without the suffix `connector`.
282
283    Parameters
284    ----------
285    instance: bool, default False
286        If `True`, make this connector type an instance connector.
287        This requires implementing the various pipes functions and lots of testing.
288
289    Examples
290    --------
291    >>> import meerschaum as mrsm
292    >>> from meerschaum.connectors import make_connector, Connector
293    >>> 
294    >>> @make_connector
295    >>> class FooConnector(Connector):
296    ...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
297    ... 
298    >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
299    >>> print(conn.username, conn.password)
300    dog cat
301    >>> 
302    """
303    import re
304    from meerschaum.plugins import _get_parent_plugin
305    suffix_regex = (
306        r'connector$'
307        if not _is_executor
308        else r'executor$'
309    )
310    plugin_name = _get_parent_plugin(2)
311    typ = re.sub(suffix_regex, '', cls.__name__.lower())
312    with _locks['types']:
313        types[typ] = cls
314    with _locks['custom_types']:
315        custom_types.add(typ)
316    if plugin_name:
317        with _locks['plugins_types']:
318            if plugin_name not in plugins_types:
319                plugins_types[plugin_name] = []
320            plugins_types[plugin_name].append(typ)
321    with _locks['connectors']:
322        if typ not in connectors:
323            connectors[typ] = {}
324    if getattr(cls, 'IS_INSTANCE', False):
325        with _locks['instance_types']:
326            if typ not in instance_types:
327                instance_types.append(typ)
328
329    return cls
330
331
332def load_plugin_connectors():
333    """
334    If a plugin makes use of the `make_connector` decorator,
335    load its module.
336    """
337    import os
338    from meerschaum.plugins import get_plugins, import_plugins
339    to_import = []
340    for plugin in get_plugins():
341        if plugin is None:
342            continue
343
344        plugin_file = plugin.__file__
345        if not plugin_file:
346            continue
347
348        files_to_scan = []
349        plugin_dir = os.path.dirname(plugin_file)
350        if os.path.basename(plugin_file) == '__init__.py':
351            for root, _, files in os.walk(plugin_dir):
352                for fname in files:
353                    if fname.endswith('.py'):
354                        files_to_scan.append(os.path.join(root, fname))
355        else:
356            files_to_scan.append(plugin_file)
357
358        found = False
359        for fpath in files_to_scan:
360            try:
361                with open(fpath, encoding='utf-8') as f:
362                    text = f.read()
363            except (OSError, UnicodeDecodeError):
364                continue
365            if 'make_connector' in text or 'Connector' in text:
366                found = True
367                break
368
369        if found:
370            to_import.append(plugin.name)
371
372    if not to_import:
373        return
374
375    import_plugins(*to_import)
376
377
378def unload_plugin_connectors(
379    plugin_names: Optional[List[str]] = None,
380    debug: bool = False,
381) -> None:
382    """
383    Unload custom connectors added by plugins.
384    """
385    from meerschaum.plugins import get_plugins_names
386    global custom_types, _known_custom_types, types, plugins_types, connectors, _loaded_plugin_connectors
387
388    plugin_names = plugin_names or get_plugins_names()
389
390    for plugin_name in plugin_names:
391        plugin_types = plugins_types.get(plugin_name, [])
392        for typ in plugin_types:
393            _ = types.pop(typ, None)
394            _ = connectors.pop(typ, None)
395            if typ in instance_types:
396                instance_types.remove(typ)
397
398    custom_types.clear()
399    custom_types.update(_known_custom_types)
400    _loaded_plugin_connectors = False
401
402
403def get_connector_plugin(
404    connector: Connector,
405) -> Union[str, None, mrsm.Plugin]:
406    """
407    Determine the plugin for a connector.
408    This is useful for handling virtual environments for custom instance connectors.
409
410    Parameters
411    ----------
412    connector: Connector
413        The connector which may require a virtual environment.
414
415    Returns
416    -------
417    A Plugin, 'mrsm', or None.
418    """
419    if not hasattr(connector, 'type'):
420        return None
421    plugin_name = (
422        connector.__module__.replace('plugins.', '').split('.')[0]
423        if connector.type in custom_types else (
424            connector.label
425            if connector.type == 'plugin'
426            else 'mrsm'
427        )
428    )
429    plugin = mrsm.Plugin(plugin_name)
430    return plugin if plugin.is_installed() else None
431
432
433def _load_builtin_custom_connectors():
434    """
435    Import custom connectors decorated with `@make_connector` or `@make_executor`.
436    """
437    import meerschaum.jobs.systemd
438    import meerschaum.connectors.valkey
439    _known_custom_types.add('valkey')
440    _known_custom_types.add('systemd')
def make_connector(cls, _is_executor: bool = False):
279def make_connector(cls, _is_executor: bool = False):
280    """
281    Register a class as a `Connector`.
282    The `type` will be the lower case of the class name, without the suffix `connector`.
283
284    Parameters
285    ----------
286    instance: bool, default False
287        If `True`, make this connector type an instance connector.
288        This requires implementing the various pipes functions and lots of testing.
289
290    Examples
291    --------
292    >>> import meerschaum as mrsm
293    >>> from meerschaum.connectors import make_connector, Connector
294    >>> 
295    >>> @make_connector
296    >>> class FooConnector(Connector):
297    ...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
298    ... 
299    >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
300    >>> print(conn.username, conn.password)
301    dog cat
302    >>> 
303    """
304    import re
305    from meerschaum.plugins import _get_parent_plugin
306    suffix_regex = (
307        r'connector$'
308        if not _is_executor
309        else r'executor$'
310    )
311    plugin_name = _get_parent_plugin(2)
312    typ = re.sub(suffix_regex, '', cls.__name__.lower())
313    with _locks['types']:
314        types[typ] = cls
315    with _locks['custom_types']:
316        custom_types.add(typ)
317    if plugin_name:
318        with _locks['plugins_types']:
319            if plugin_name not in plugins_types:
320                plugins_types[plugin_name] = []
321            plugins_types[plugin_name].append(typ)
322    with _locks['connectors']:
323        if typ not in connectors:
324            connectors[typ] = {}
325    if getattr(cls, 'IS_INSTANCE', False):
326        with _locks['instance_types']:
327            if typ not in instance_types:
328                instance_types.append(typ)
329
330    return cls

Register a class as a Connector. The type will be the lower case of the class name, without the suffix connector.

Parameters
  • instance (bool, default False): If True, make this connector type an instance connector. This requires implementing the various pipes functions and lots of testing.
Examples
>>> import meerschaum as mrsm
>>> from meerschaum.connectors import make_connector, Connector
>>> 
>>> @make_connector
>>> class FooConnector(Connector):
...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
... 
>>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
>>> print(conn.username, conn.password)
dog cat
>>>
class Connector:
 22class Connector(metaclass=abc.ABCMeta):
 23    """
 24    The base connector class to hold connection attributes.
 25    """
 26
 27    IS_INSTANCE: bool = False
 28
 29    def __init__(
 30        self,
 31        type: Optional[str] = None,
 32        label: Optional[str] = None,
 33        **kw: Any
 34    ):
 35        """
 36        Set the given keyword arguments as attributes.
 37
 38        Parameters
 39        ----------
 40        type: str
 41            The `type` of the connector (e.g. `sql`, `api`, `plugin`).
 42
 43        label: str
 44            The `label` for the connector.
 45
 46
 47        Examples
 48        --------
 49        Run `mrsm edit config` and to edit connectors in the YAML file:
 50
 51        ```yaml
 52        meerschaum:
 53            connections:
 54                {type}:
 55                    {label}:
 56                        ### attributes go here
 57        ```
 58
 59        """
 60        self._original_dict = copy.deepcopy(self.__dict__)
 61        self._set_attributes(type=type, label=label, **kw)
 62
 63        ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set.
 64        self.verify_attributes(
 65            ['uri']
 66            if 'uri' in self.__dict__
 67            else getattr(self, 'REQUIRED_ATTRIBUTES', None)
 68        )
 69
 70    def _reset_attributes(self):
 71        self.__dict__ = self._original_dict
 72
 73    def _set_attributes(
 74        self,
 75        *args,
 76        inherit_default: bool = True,
 77        **kw: Any
 78    ):
 79        from meerschaum._internal.static import STATIC_CONFIG
 80        from meerschaum.utils.warnings import error
 81
 82        self._attributes = {}
 83
 84        default_label = STATIC_CONFIG['connectors']['default_label']
 85
 86        ### NOTE: Support the legacy method of explicitly passing the type.
 87        label = kw.get('label', None)
 88        if label is None:
 89            if len(args) == 2:
 90                label = args[1]
 91            elif len(args) == 0:
 92                label = None
 93            else:
 94                label = args[0]
 95
 96        if label == 'default':
 97            error(
 98                f"Label cannot be 'default'. Did you mean '{default_label}'?",
 99                InvalidAttributesError,
100            )
101        self.__dict__['label'] = label
102
103        from meerschaum.config import get_config
104        conn_configs = copy.deepcopy(get_config('meerschaum', 'connectors'))
105        connector_config = copy.deepcopy(get_config('system', 'connectors'))
106
107        ### inherit attributes from 'default' if exists
108        if inherit_default:
109            inherit_from = 'default'
110            if self.type in conn_configs and inherit_from in conn_configs[self.type]:
111                _inherit_dict = copy.deepcopy(conn_configs[self.type][inherit_from])
112                self._attributes.update(_inherit_dict)
113
114        ### load user config into self._attributes
115        if self.type in conn_configs and self.label in conn_configs[self.type]:
116            self._attributes.update(conn_configs[self.type][self.label] or {})
117
118        ### load system config into self._sys_config
119        ### (deep copy so future Connectors don't inherit changes)
120        if self.type in connector_config:
121            self._sys_config = copy.deepcopy(connector_config[self.type])
122
123        ### add additional arguments or override configuration
124        self._attributes.update(kw)
125
126        ### finally, update __dict__ with _attributes.
127        self.__dict__.update(self._attributes)
128
129    def verify_attributes(
130        self,
131        required_attributes: Optional[List[str]] = None,
132        debug: bool = False,
133    ) -> None:
134        """
135        Ensure that the required attributes have been met.
136        
137        The Connector base class checks the minimum requirements.
138        Child classes may enforce additional requirements.
139
140        Parameters
141        ----------
142        required_attributes: Optional[List[str]], default None
143            Attributes to be verified. If `None`, default to `['label']`.
144
145        debug: bool, default False
146            Verbosity toggle.
147
148        Returns
149        -------
150        Don't return anything.
151
152        Raises
153        ------
154        An error if any of the required attributes are missing.
155        """
156        from meerschaum.utils.warnings import error
157        from meerschaum.utils.misc import items_str
158        if required_attributes is None:
159            required_attributes = ['type', 'label']
160
161        missing_attributes = set()
162        for a in required_attributes:
163            if a not in self.__dict__:
164                missing_attributes.add(a)
165        if len(missing_attributes) > 0:
166            error(
167                (
168                    f"Missing {items_str(list(missing_attributes))} "
169                    + f"for connector '{self.type}:{self.label}'."
170                ),
171                InvalidAttributesError,
172                silent=True,
173                stack=False
174            )
175
176
177    def __str__(self):
178        """
179        When cast to a string, return type:label.
180        """
181        return f"{self.type}:{self.label}"
182
183    def __repr__(self):
184        """
185        Represent the connector as type:label.
186        """
187        return str(self)
188
189    @property
190    def meta(self) -> Dict[str, Any]:
191        """
192        Return the keys needed to reconstruct this Connector.
193        """
194        _meta = {
195            key: value
196            for key, value in self.__dict__.items()
197            if not str(key).startswith('_')
198        }
199        _meta.update({
200            'type': self.type,
201            'label': self.label,
202        })
203        return _meta
204
205
206    @property
207    def type(self) -> str:
208        """
209        Return the type for this connector.
210        """
211        _type = self.__dict__.get('type', None)
212        if _type is None:
213            import re
214            is_executor = self.__class__.__name__.lower().endswith('executor')
215            suffix_regex = (
216                r'connector$'
217                if not is_executor
218                else r'executor$'
219            )
220            _type = re.sub(suffix_regex, '', self.__class__.__name__.lower())
221            if not _type or _type.lower() == 'instance':
222                raise ValueError("No type could be determined for this connector.")
223            self.__dict__['type'] = _type
224        return _type
225
226
227    @property
228    def label(self) -> str:
229        """
230        Return the label for this connector.
231        """
232        _label = self.__dict__.get('label', None)
233        if _label is None:
234            from meerschaum._internal.static import STATIC_CONFIG
235            _label = STATIC_CONFIG['connectors']['default_label']
236            self.__dict__['label'] = _label
237        return _label

The base connector class to hold connection attributes.

Connector(type: Optional[str] = None, label: Optional[str] = None, **kw: Any)
29    def __init__(
30        self,
31        type: Optional[str] = None,
32        label: Optional[str] = None,
33        **kw: Any
34    ):
35        """
36        Set the given keyword arguments as attributes.
37
38        Parameters
39        ----------
40        type: str
41            The `type` of the connector (e.g. `sql`, `api`, `plugin`).
42
43        label: str
44            The `label` for the connector.
45
46
47        Examples
48        --------
49        Run `mrsm edit config` and to edit connectors in the YAML file:
50
51        ```yaml
52        meerschaum:
53            connections:
54                {type}:
55                    {label}:
56                        ### attributes go here
57        ```
58
59        """
60        self._original_dict = copy.deepcopy(self.__dict__)
61        self._set_attributes(type=type, label=label, **kw)
62
63        ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set.
64        self.verify_attributes(
65            ['uri']
66            if 'uri' in self.__dict__
67            else getattr(self, 'REQUIRED_ATTRIBUTES', None)
68        )

Set the given keyword arguments as attributes.

Parameters
  • type (str): The type of the connector (e.g. sql, api, plugin).
  • label (str): The label for the connector.
Examples

Run mrsm edit config and to edit connectors in the YAML file:

meerschaum:
    connections:
        {type}:
            {label}:
                ### attributes go here
IS_INSTANCE: bool = False
def verify_attributes( self, required_attributes: Optional[List[str]] = None, debug: bool = False) -> None:
129    def verify_attributes(
130        self,
131        required_attributes: Optional[List[str]] = None,
132        debug: bool = False,
133    ) -> None:
134        """
135        Ensure that the required attributes have been met.
136        
137        The Connector base class checks the minimum requirements.
138        Child classes may enforce additional requirements.
139
140        Parameters
141        ----------
142        required_attributes: Optional[List[str]], default None
143            Attributes to be verified. If `None`, default to `['label']`.
144
145        debug: bool, default False
146            Verbosity toggle.
147
148        Returns
149        -------
150        Don't return anything.
151
152        Raises
153        ------
154        An error if any of the required attributes are missing.
155        """
156        from meerschaum.utils.warnings import error
157        from meerschaum.utils.misc import items_str
158        if required_attributes is None:
159            required_attributes = ['type', 'label']
160
161        missing_attributes = set()
162        for a in required_attributes:
163            if a not in self.__dict__:
164                missing_attributes.add(a)
165        if len(missing_attributes) > 0:
166            error(
167                (
168                    f"Missing {items_str(list(missing_attributes))} "
169                    + f"for connector '{self.type}:{self.label}'."
170                ),
171                InvalidAttributesError,
172                silent=True,
173                stack=False
174            )

Ensure that the required attributes have been met.

The Connector base class checks the minimum requirements. Child classes may enforce additional requirements.

Parameters
  • required_attributes (Optional[List[str]], default None): Attributes to be verified. If None, default to ['label'].
  • debug (bool, default False): Verbosity toggle.
Returns
  • Don't return anything.
Raises
  • An error if any of the required attributes are missing.
meta: Dict[str, Any]
189    @property
190    def meta(self) -> Dict[str, Any]:
191        """
192        Return the keys needed to reconstruct this Connector.
193        """
194        _meta = {
195            key: value
196            for key, value in self.__dict__.items()
197            if not str(key).startswith('_')
198        }
199        _meta.update({
200            'type': self.type,
201            'label': self.label,
202        })
203        return _meta

Return the keys needed to reconstruct this Connector.

type: str
206    @property
207    def type(self) -> str:
208        """
209        Return the type for this connector.
210        """
211        _type = self.__dict__.get('type', None)
212        if _type is None:
213            import re
214            is_executor = self.__class__.__name__.lower().endswith('executor')
215            suffix_regex = (
216                r'connector$'
217                if not is_executor
218                else r'executor$'
219            )
220            _type = re.sub(suffix_regex, '', self.__class__.__name__.lower())
221            if not _type or _type.lower() == 'instance':
222                raise ValueError("No type could be determined for this connector.")
223            self.__dict__['type'] = _type
224        return _type

Return the type for this connector.

label: str
227    @property
228    def label(self) -> str:
229        """
230        Return the label for this connector.
231        """
232        _label = self.__dict__.get('label', None)
233        if _label is None:
234            from meerschaum._internal.static import STATIC_CONFIG
235            _label = STATIC_CONFIG['connectors']['default_label']
236            self.__dict__['label'] = _label
237        return _label

Return the label for this connector.

class InstanceConnector(meerschaum.connectors.Connector):
18class InstanceConnector(Connector):
19    """
20    Instance connectors define the interface for managing pipes and provide methods
21    for management of users, plugins, tokens, and other metadata built atop pipes.
22    """
23
24    IS_INSTANCE: bool = True
25    IS_THREAD_SAFE: bool = False
26
27    from ._users import (
28        get_users_pipe,
29        register_user,
30        get_user_id,
31        get_username,
32        get_users,
33        edit_user,
34        delete_user,
35        get_user_password_hash,
36        get_user_type,
37        get_user_attributes,
38    )
39
40    from ._plugins import (
41        get_plugins_pipe,
42        register_plugin,
43        get_plugin_user_id,
44        delete_plugin,
45        get_plugin_id,
46        get_plugin_version,
47        get_plugins,
48        get_plugin_user_id,
49        get_plugin_username,
50        get_plugin_attributes,
51    )
52
53    from ._tokens import (
54        get_tokens_pipe,
55        register_token,
56        edit_token,
57        invalidate_token,
58        delete_token,
59        get_token,
60        get_tokens,
61        get_token_model,
62        get_token_secret_hash,
63        token_exists,
64        get_token_scopes,
65    )
66
67    from ._pipes import (
68        register_pipe,
69        get_pipe_attributes,
70        get_pipe_id,
71        edit_pipe,
72        delete_pipe,
73        fetch_pipes_keys,
74        pipe_exists,
75        drop_pipe,
76        drop_pipe_indices,
77        sync_pipe,
78        create_pipe_indices,
79        clear_pipe,
80        get_pipe_data,
81        get_pipe_docs,
82        get_sync_time,
83        get_pipe_columns_types,
84        get_pipe_columns_indices,
85        get_pipe_size,
86        compress_pipe,
87        decompress_pipe,
88        vacuum_pipe,
89        analyze_pipe,
90        partition_pipe,
91    )

Instance connectors define the interface for managing pipes and provide methods for management of users, plugins, tokens, and other metadata built atop pipes.

IS_INSTANCE: bool = True
IS_THREAD_SAFE: bool = False
def get_users_pipe(self) -> meerschaum.Pipe:
18def get_users_pipe(self) -> 'mrsm.Pipe':
19    """
20    Return the pipe used for users registration.
21    """
22    if '_users_pipe' in self.__dict__:
23        return self._users_pipe
24
25    cache_connector = self.__dict__.get('_cache_connector', None)
26    self._users_pipe = mrsm.Pipe(
27        'mrsm', 'users',
28        instance=self,
29        target='mrsm_users',
30        temporary=True,
31        cache=True,
32        cache_connector_keys=cache_connector,
33        static=True,
34        null_indices=False,
35        columns={
36            'primary': 'user_id',
37        },
38        dtypes={
39            'user_id': 'uuid',
40            'username': 'string',
41            'password_hash': 'string',
42            'email': 'string',
43            'user_type': 'string',
44            'attributes': 'json',
45        },
46        indices={
47            'unique': 'username',
48        },
49    )
50    return self._users_pipe

Return the pipe used for users registration.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
53def register_user(
54    self,
55    user: User,
56    debug: bool = False,
57    **kwargs: Any
58) -> mrsm.SuccessTuple:
59    """
60    Register a new user to the users pipe.
61    """
62    users_pipe = self.get_users_pipe()
63    user.user_id = uuid.uuid4()
64    sync_success, sync_msg = users_pipe.sync(
65        [{
66            'user_id': user.user_id,
67            'username': user.username,
68            'email': user.email,
69            'password_hash': user.password_hash,
70            'user_type': user.type,
71            'attributes': user.attributes,
72        }],
73        check_existing=False,
74        debug=debug,
75    )
76    if not sync_success:
77        return False, f"Failed to register user '{user.username}':\n{sync_msg}"
78
79    return True, "Success"

Register a new user to the users pipe.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[uuid.UUID]:
82def get_user_id(self, user: User, debug: bool = False) -> Union[uuid.UUID, None]:
83    """
84    Return a user's ID from the username.
85    """
86    users_pipe = self.get_users_pipe()
87    result_df = users_pipe.get_data(['user_id'], params={'username': user.username}, limit=1)
88    if result_df is None or len(result_df) == 0:
89        return None
90    return result_df['user_id'][0]

Return a user's ID from the username.

def get_username(self, user_id: Any, debug: bool = False) -> Any:
93def get_username(self, user_id: Any, debug: bool = False) -> Any:
94    """
95    Return the username from the given ID.
96    """
97    users_pipe = self.get_users_pipe()
98    return users_pipe.get_value('username', {'user_id': user_id}, debug=debug)

Return the username from the given ID.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
101def get_users(
102    self,
103    debug: bool = False,
104    **kw: Any
105) -> List[str]:
106    """
107    Get the registered usernames.
108    """
109    users_pipe = self.get_users_pipe()
110    df = users_pipe.get_data()
111    if df is None:
112        return []
113
114    return list(df['username'])

Get the registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Tuple[bool, str]:
117def edit_user(self, user: User, debug: bool = False) -> mrsm.SuccessTuple:
118    """
119    Edit the attributes for an existing user.
120    """
121    users_pipe = self.get_users_pipe()
122    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
123
124    doc = {'user_id': user_id}
125    if user.email != '':
126        doc['email'] = user.email
127    if user.password_hash != '':
128        doc['password_hash'] = user.password_hash
129    if user.type != '':
130        doc['user_type'] = user.type
131    if user.attributes:
132        doc['attributes'] = user.attributes
133
134    sync_success, sync_msg = users_pipe.sync([doc], debug=debug)
135    if not sync_success:
136        return False, f"Failed to edit user '{user.username}':\n{sync_msg}"
137
138    return True, "Success"

Edit the attributes for an existing user.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Tuple[bool, str]:
141def delete_user(self, user: User, debug: bool = False) -> mrsm.SuccessTuple:
142    """
143    Delete a user from the users table.
144    """
145    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
146    users_pipe = self.get_users_pipe()
147    clear_success, clear_msg = users_pipe.clear(params={'user_id': user_id}, debug=debug)
148    if not clear_success:
149        return False, f"Failed to delete user '{user}':\n{clear_msg}"
150    return True, "Success"

Delete a user from the users table.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[uuid.UUID]:
153def get_user_password_hash(self, user: User, debug: bool = False) -> Union[uuid.UUID, None]:
154    """
155    Get a user's password hash from the users table.
156    """
157    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
158    users_pipe = self.get_users_pipe()
159    result_df = users_pipe.get_data(['password_hash'], params={'user_id': user_id}, debug=debug)
160    if result_df is None or len(result_df) == 0:
161        return None
162
163    return result_df['password_hash'][0]

Get a user's password hash from the users table.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[str]:
166def get_user_type(self, user: User, debug: bool = False) -> Union[str, None]:
167    """
168    Get a user's type from the users table.
169    """
170    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
171    users_pipe = self.get_users_pipe()
172    result_df = users_pipe.get_data(['user_type'], params={'user_id': user_id}, debug=debug)
173    if result_df is None or len(result_df) == 0:
174        return None
175
176    return result_df['user_type'][0]

Get a user's type from the users table.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[Dict[str, Any]]:
179def get_user_attributes(self, user: User, debug: bool = False) -> Union[Dict[str, Any], None]:
180    """
181    Get a user's attributes from the users table.
182    """
183    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
184    users_pipe = self.get_users_pipe()
185    result_df = users_pipe.get_data(['attributes'], params={'user_id': user_id}, debug=debug)
186    if result_df is None or len(result_df) == 0:
187        return None
188
189    return result_df['attributes'][0]

Get a user's attributes from the users table.

def get_plugins_pipe(self) -> meerschaum.Pipe:
16def get_plugins_pipe(self) -> 'mrsm.Pipe':
17    """
18    Return the internal pipe for syncing plugins metadata.
19    """
20    if '_plugins_pipe' in self.__dict__:
21        return self._plugins_pipe
22
23    cache_connector = self.__dict__.get('_cache_connector', None)
24    users_pipe = self.get_users_pipe()
25    user_id_dtype = users_pipe.dtypes.get('user_id', 'uuid')
26
27    self._plugins_pipe = mrsm.Pipe(
28        'mrsm', 'plugins',
29        instance=self,
30        target='mrsm_plugins',
31        temporary=True,
32        cache=True,
33        cache_connector_keys=cache_connector,
34        static=True,
35        null_indices=False,
36        columns={
37            'primary': 'plugin_name',
38            'user_id': 'user_id',
39        },
40        dtypes={
41            'plugin_name': 'string',
42            'user_id': user_id_dtype,
43            'attributes': 'json',
44            'version': 'string',
45        },
46    )
47    return self._plugins_pipe

Return the internal pipe for syncing plugins metadata.

def register_plugin( self, plugin: meerschaum.Plugin, debug: bool = False) -> Tuple[bool, str]:
50def register_plugin(self, plugin: Plugin, debug: bool = False) -> mrsm.SuccessTuple:
51    """
52    Register a new plugin to the plugins table.
53    """
54    plugins_pipe = self.get_plugins_pipe()
55    users_pipe = self.get_users_pipe()
56    user_id = self.get_plugin_user_id(plugin)
57    if user_id is not None:
58        username = self.get_username(user_id, debug=debug)
59        return False, f"{plugin} is already registered to '{username}'."
60
61    doc = {
62        'plugin_name': plugin.name,
63        'version': plugin.version,
64        'attributes': plugin.attributes,
65        'user_id': plugin.user_id,
66    }
67
68    sync_success, sync_msg = plugins_pipe.sync(
69        [doc],
70        check_existing=False,
71        debug=debug,
72    )
73    if not sync_success:
74        return False, f"Failed to register {plugin}:\n{sync_msg}"
75
76    return True, "Success"

Register a new plugin to the plugins table.

def get_plugin_user_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[uuid.UUID]:
79def get_plugin_user_id(self, plugin: Plugin, debug: bool = False) -> Union[uuid.UUID, None]:
80    """
81    Return the user ID for plugin's owner.
82    """
83    plugins_pipe = self.get_plugins_pipe() 
84    return plugins_pipe.get_value('user_id', {'plugin_name': plugin.name}, debug=debug)

Return the user ID for plugin's owner.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False) -> Tuple[bool, str]:
105def delete_plugin(self, plugin: Plugin, debug: bool = False) -> mrsm.SuccessTuple:
106    """
107    Delete a plugin's registration.
108    """
109    plugin_id = self.get_plugin_id(plugin, debug=debug)
110    if plugin_id is None:
111        return False, f"{plugin} is not registered."
112    
113    plugins_pipe = self.get_plugins_pipe()
114    clear_success, clear_msg = plugins_pipe.clear(params={'plugin_name': plugin.name}, debug=debug)
115    if not clear_success:
116        return False, f"Failed to delete {plugin}:\n{clear_msg}"
117    return True, "Success"

Delete a plugin's registration.

def get_plugin_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
 97def get_plugin_id(self, plugin: Plugin, debug: bool = False) -> Union[str, None]:
 98    """
 99    Return a plugin's ID.
100    """
101    user_id = self.get_plugin_user_id(plugin, debug=debug)
102    return plugin.name if user_id is not None else None

Return a plugin's ID.

def get_plugin_version( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
120def get_plugin_version(self, plugin: Plugin, debug: bool = False) -> Union[str, None]:
121    """
122    Return the version for a plugin.
123    """
124    plugins_pipe = self.get_plugins_pipe() 
125    return plugins_pipe.get_value('version', {'plugin_name': plugin.name}, debug=debug)

Return the version for a plugin.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False, **kw: Any) -> List[str]:
136def get_plugins(
137    self,
138    user_id: Optional[int] = None,
139    search_term: Optional[str] = None,
140    debug: bool = False,
141    **kw: Any
142) -> List[str]:
143    """
144    Return a list of plugin names.
145    """
146    plugins_pipe = self.get_plugins_pipe()
147    params = {}
148    if user_id:
149        params['user_id'] = user_id
150
151    df = plugins_pipe.get_data(['plugin_name'], params=params, debug=debug)
152    if df is None:
153        return []
154
155    docs = df.to_dict(orient='records')
156    return [
157        plugin_name
158        for doc in docs
159        if (plugin_name := doc['plugin_name']).startswith(search_term or '')
160    ]

Return a list of plugin names.

def get_plugin_username( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[uuid.UUID]:
87def get_plugin_username(self, plugin: Plugin, debug: bool = False) -> Union[uuid.UUID, None]:
88    """
89    Return the username for plugin's owner.
90    """
91    user_id = self.get_plugin_user_id(plugin, debug=debug)
92    if user_id is None:
93        return None
94    return self.get_username(user_id, debug=debug)

Return the username for plugin's owner.

def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Dict[str, Any]:
128def get_plugin_attributes(self, plugin: Plugin, debug: bool = False) -> Dict[str, Any]:
129    """
130    Return the attributes for a plugin.
131    """
132    plugins_pipe = self.get_plugins_pipe() 
133    return plugins_pipe.get_value('attributes', {'plugin_name': plugin.name}, debug=debug) or {}

Return the attributes for a plugin.

def get_tokens_pipe(self) -> meerschaum.Pipe:
22def get_tokens_pipe(self) -> mrsm.Pipe:
23    """
24    Return the internal pipe for tokens management.
25    """
26    if '_tokens_pipe' in self.__dict__:
27        return self._tokens_pipe
28
29    users_pipe = self.get_users_pipe()
30    user_id_dtype = (
31        users_pipe._attributes.get('parameters', {}).get('dtypes', {}).get('user_id', 'uuid')
32    )
33
34    cache_connector = self.__dict__.get('_cache_connector', None)
35
36    self._tokens_pipe = mrsm.Pipe(
37        'mrsm', 'tokens',
38        instance=self,
39        target='mrsm_tokens',
40        temporary=True,
41        cache=True,
42        cache_connector_keys=cache_connector,
43        static=True,
44        autotime=True,
45        null_indices=False,
46        columns={
47            'datetime': 'creation',
48            'primary': 'id',
49        },
50        indices={
51            'unique': 'label',
52            'user_id': 'user_id',
53        },
54        dtypes={
55            'id': 'uuid',
56            'creation': 'datetime',
57            'expiration': 'datetime',
58            'is_valid': 'bool',
59            'label': 'string',
60            'user_id': user_id_dtype,
61            'scopes': 'json',
62            'secret_hash': 'string',
63        },
64    )
65    return self._tokens_pipe

Return the internal pipe for tokens management.

def register_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
68def register_token(
69    self,
70    token: Token,
71    debug: bool = False,
72) -> mrsm.SuccessTuple:
73    """
74    Register the new token to the tokens table.
75    """
76    token_id, token_secret = token.generate_credentials()
77    tokens_pipe = self.get_tokens_pipe()
78    user_id = self.get_user_id(token.user) if token.user is not None else None
79    if user_id is None:
80        return False, "Cannot register a token without a user."
81
82    doc = {
83        'id': token_id,
84        'user_id': user_id,
85        'creation': datetime.now(timezone.utc),
86        'expiration': token.expiration,
87        'label': token.label,
88        'is_valid': token.is_valid,
89        'scopes': list(token.scopes) if token.scopes else [],
90        'secret_hash': hash_password(
91            str(token_secret),
92            rounds=STATIC_CONFIG['tokens']['hash_rounds']
93        ),
94    }
95    sync_success, sync_msg = tokens_pipe.sync([doc], check_existing=False, debug=debug)
96    if not sync_success:
97        return False, f"Failed to register token:\n{sync_msg}"
98    return True, "Success"

Register the new token to the tokens table.

def edit_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
101def edit_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
102    """
103    Persist the token's in-memory state to the tokens pipe.
104    """
105    if not token.id:
106        return False, "Token ID is not set."
107
108    if not token.exists(debug=debug):
109        return False, f"Token {token.id} does not exist."
110
111    if not token.creation:
112        token_model = self.get_token_model(token.id)
113        token.creation = token_model.creation
114
115    tokens_pipe = self.get_tokens_pipe()
116    doc = {
117        'id': token.id,
118        'creation': token.creation,
119        'expiration': token.expiration,
120        'label': token.label,
121        'is_valid': token.is_valid,
122        'scopes': list(token.scopes) if token.scopes else [],
123    }
124    sync_success, sync_msg = tokens_pipe.sync([doc], debug=debug)
125    if not sync_success:
126        return False, f"Failed to edit token '{token.id}':\n{sync_msg}"
127
128    return True, "Success"

Persist the token's in-memory state to the tokens pipe.

def invalidate_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
131def invalidate_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
132    """
133    Set `is_valid` to `False` for the given token.
134    """
135    if not token.id:
136        return False, "Token ID is not set."
137
138    if not token.exists(debug=debug):
139        return False, f"Token {token.id} does not exist."
140
141    if not token.creation:
142        token_model = self.get_token_model(token.id)
143        token.creation = token_model.creation
144
145    token.is_valid = False
146    tokens_pipe = self.get_tokens_pipe()
147    doc = {
148        'id': token.id,
149        'creation': token.creation,
150        'is_valid': False,
151    }
152    sync_success, sync_msg = tokens_pipe.sync([doc], debug=debug)
153    if not sync_success:
154        return False, f"Failed to invalidate token '{token.id}':\n{sync_msg}"
155
156    return True, "Success"

Set is_valid to False for the given token.

def delete_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
159def delete_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
160    """
161    Delete the given token from the tokens table.
162    """
163    if not token.id:
164        return False, "Token ID is not set."
165
166    if not token.exists(debug=debug):
167        return False, f"Token {token.id} does not exist."
168
169    if not token.creation:
170        token_model = self.get_token_model(token.id)
171        token.creation = token_model.creation
172
173    token.is_valid = False
174    tokens_pipe = self.get_tokens_pipe()
175    clear_success, clear_msg = tokens_pipe.clear(params={'id': token.id}, debug=debug)
176    if not clear_success:
177        return False, f"Failed to delete token '{token.id}':\n{clear_msg}"
178
179    return True, "Success"

Delete the given token from the tokens table.

def get_token( self, token_id: Union[uuid.UUID, str], debug: bool = False) -> Optional[meerschaum.core.Token._Token.Token]:
235def get_token(self, token_id: Union[uuid.UUID, str], debug: bool = False) -> Union[Token, None]:
236    """
237    Return the `Token` from its ID.
238    """
239    from meerschaum.utils.misc import is_uuid
240    if isinstance(token_id, str):
241        if is_uuid(token_id):
242            token_id = uuid.UUID(token_id)
243        else:
244            raise ValueError("Invalid token ID.")
245    token_model = self.get_token_model(token_id)
246    if token_model is None:
247        return None
248    return Token(**dict(token_model))

Return the Token from its ID.

def get_tokens( self, user: Optional[meerschaum.core.User._User.User] = None, labels: Optional[List[str]] = None, ids: Optional[List[uuid.UUID]] = None, debug: bool = False) -> List[meerschaum.core.Token._Token.Token]:
182def get_tokens(
183    self,
184    user: Optional[User] = None,
185    labels: Optional[List[str]] = None,
186    ids: Optional[List[uuid.UUID]] = None,
187    debug: bool = False,
188) -> List[Token]:
189    """
190    Return a list of `Token` objects.
191    """
192    tokens_pipe = self.get_tokens_pipe()
193    user_id = (
194        self.get_user_id(user, debug=debug)
195        if user is not None
196        else None
197    )
198    user_type = self.get_user_type(user, debug=debug) if user is not None else None
199    params = (
200        {
201            'user_id': (
202                user_id
203                if user_type != 'admin'
204                else [user_id, None]
205            )
206        }
207        if user_id is not None
208        else {}
209    )
210    if labels:
211        params['label'] = labels
212    if ids:
213        params['id'] = ids
214        
215    if debug:
216        dprint(f"Getting tokens with {user_id=}, {params=}")
217
218    tokens_df = tokens_pipe.get_data(params=params, debug=debug)
219    if tokens_df is None:
220        return []
221
222    if debug:
223        dprint(f"Retrieved tokens dataframe:\n{tokens_df}")
224
225    tokens_docs = tokens_df.to_dict(orient='records')
226    return [
227        Token(
228            instance=self,
229            **token_doc
230        )
231        for token_doc in reversed(tokens_docs)
232    ]

Return a list of Token objects.

def get_token_model( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> "'Union[TokenModel, None]'":
251def get_token_model(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> 'Union[TokenModel, None]':
252    """
253    Return a token's model from the instance.
254    """
255    from meerschaum.models import TokenModel
256    if isinstance(token_id, Token):
257        token_id = Token.id
258    if not token_id:
259        raise ValueError("Invalid token ID.")
260    tokens_pipe = self.get_tokens_pipe()
261    doc = tokens_pipe.get_doc(
262        params={'id': token_id},
263        debug=debug,
264    )
265    if doc is None:
266        return None
267    return TokenModel(**doc)

Return a token's model from the instance.

def get_token_secret_hash( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> Optional[str]:
270def get_token_secret_hash(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> Union[str, None]:
271    """
272    Return the secret hash for a given token.
273    """
274    if isinstance(token_id, Token):
275        token_id = token_id.id
276    if not token_id:
277        raise ValueError("Invalid token ID.")
278    tokens_pipe = self.get_tokens_pipe()
279    return tokens_pipe.get_value('secret_hash', params={'id': token_id}, debug=debug)

Return the secret hash for a given token.

def token_exists( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> bool:
308def token_exists(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> bool:
309    """
310    Return `True` if a token exists in the tokens pipe.
311    """
312    if isinstance(token_id, Token):
313        token_id = token_id.id
314    if not token_id:
315        raise ValueError("Invalid token ID.")
316
317    tokens_pipe = self.get_tokens_pipe()
318    return tokens_pipe.get_value('creation', params={'id': token_id}, debug=debug) is not None

Return True if a token exists in the tokens pipe.

def get_token_scopes( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> List[str]:
295def get_token_scopes(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> List[str]:
296    """
297    Return the scopes for a token.
298    """
299    if isinstance(token_id, Token):
300        token_id = token_id.id
301    if not token_id:
302        raise ValueError("Invalid token ID.")
303
304    tokens_pipe = self.get_tokens_pipe()
305    return tokens_pipe.get_value('scopes', params={'id': token_id}, debug=debug) or []

Return the scopes for a token.

@abc.abstractmethod
def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
17@abc.abstractmethod
18def register_pipe(
19    self,
20    pipe: mrsm.Pipe,
21    debug: bool = False,
22    **kwargs: Any
23) -> mrsm.SuccessTuple:
24    """
25    Insert the pipe's attributes into the internal `pipes` table.
26
27    Parameters
28    ----------
29    pipe: mrsm.Pipe
30        The pipe to be registered.
31
32    Returns
33    -------
34    A `SuccessTuple` of the result.
35    """

Insert the pipe's attributes into the internal pipes table.

Parameters
  • pipe (mrsm.Pipe): The pipe to be registered.
Returns
  • A SuccessTuple of the result.
@abc.abstractmethod
def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Dict[str, Any]:
37@abc.abstractmethod
38def get_pipe_attributes(
39    self,
40    pipe: mrsm.Pipe,
41    debug: bool = False,
42    **kwargs: Any
43) -> Dict[str, Any]:
44    """
45    Return the pipe's document from the internal `pipes` table.
46
47    Parameters
48    ----------
49    pipe: mrsm.Pipe
50        The pipe whose attributes should be retrieved.
51
52    Returns
53    -------
54    The document that matches the keys of the pipe.
55    """

Return the pipe's document from the internal pipes table.

Parameters
  • pipe (mrsm.Pipe): The pipe whose attributes should be retrieved.
Returns
  • The document that matches the keys of the pipe.
@abc.abstractmethod
def get_pipe_id( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Union[str, int, NoneType]:
57@abc.abstractmethod
58def get_pipe_id(
59    self,
60    pipe: mrsm.Pipe,
61    debug: bool = False,
62    **kwargs: Any
63) -> Union[str, int, None]:
64    """
65    Return the `id` for the pipe if it exists.
66
67    Parameters
68    ----------
69    pipe: mrsm.Pipe
70        The pipe whose `id` to fetch.
71
72    Returns
73    -------
74    The `id` for the pipe's document or `None`.
75    """

Return the id for the pipe if it exists.

Parameters
  • pipe (mrsm.Pipe): The pipe whose id to fetch.
Returns
  • The id for the pipe's document or None.
def edit_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
77def edit_pipe(
78    self,
79    pipe: mrsm.Pipe,
80    debug: bool = False,
81    **kwargs: Any
82) -> mrsm.SuccessTuple:
83    """
84    Edit the attributes of the pipe.
85
86    Parameters
87    ----------
88    pipe: mrsm.Pipe
89        The pipe whose in-memory parameters must be persisted.
90
91    Returns
92    -------
93    A `SuccessTuple` indicating success.
94    """
95    raise NotImplementedError

Edit the attributes of the pipe.

Parameters
  • pipe (mrsm.Pipe): The pipe whose in-memory parameters must be persisted.
Returns
  • A SuccessTuple indicating success.
def delete_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
 97def delete_pipe(
 98    self,
 99    pipe: mrsm.Pipe,
100    debug: bool = False,
101    **kwargs: Any
102) -> mrsm.SuccessTuple:
103    """
104    Delete a pipe's registration from the `pipes` collection.
105
106    Parameters
107    ----------
108    pipe: mrsm.Pipe
109        The pipe to be deleted.
110
111    Returns
112    -------
113    A `SuccessTuple` indicating success.
114    """
115    raise NotImplementedError

Delete a pipe's registration from the pipes collection.

Parameters
  • pipe (mrsm.Pipe): The pipe to be deleted.
Returns
  • A SuccessTuple indicating success.
@abc.abstractmethod
def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, debug: bool = False, **kwargs: Any) -> Union[List[Tuple[str, str, str]], List[Tuple[str, str, str, Union[Dict[str, Any], List[str]]]], Dict[Union[int, str], Tuple[str, str, str]], Dict[Union[int, str], Tuple[str, str, str, Union[Dict[str, Any], List[str]]]]]:
117@abc.abstractmethod
118def fetch_pipes_keys(
119    self,
120    connector_keys: Optional[List[str]] = None,
121    metric_keys: Optional[List[str]] = None,
122    location_keys: Optional[List[str]] = None,
123    tags: Optional[List[str]] = None,
124    debug: bool = False,
125    **kwargs: Any
126) -> Union[
127    List[Tuple[str, str, str]],
128    List[Tuple[str, str, str, Union[Dict[str, Any], List[str]]]],
129    Dict[Union[int, str], Tuple[str, str, str]],
130    Dict[Union[int, str], Tuple[str, str, str, Union[Dict[str, Any], List[str]]]],
131]:
132    """
133    Return registered pipes' keys according to the provided filters.
134
135    May return either a list of key tuples or a dictionary mapping pipe IDs to key tuples.
136    When returning a dictionary, the key is the pipe's unique ID (int or str).
137    Tuples may be length 3 `(connector_keys, metric_key, location_key)` or length 4
138    with parameters or tags appended as the fourth element.
139
140    Parameters
141    ----------
142    connector_keys: list[str] | None, default None
143        The keys passed via `-c`.
144
145    metric_keys: list[str] | None, default None
146        The keys passed via `-m`.
147
148    location_keys: list[str] | None, default None
149        The keys passed via `-l`.
150
151    tags: List[str] | None, default None
152        Tags passed via `--tags` which are stored under `parameters:tags`.
153
154    Returns
155    -------
156    A list of tuples or a dictionary mapping pipe IDs to tuples.
157    You may return the string `"None"` for location keys in place of nulls.
158
159    Examples
160    --------
161    >>> import meerschaum as mrsm
162    >>> conn = mrsm.get_connector('example:demo')
163    >>>
164    >>> pipe_a = mrsm.Pipe('a', 'demo', tags=['foo'], instance=conn)
165    >>> pipe_b = mrsm.Pipe('b', 'demo', tags=['bar'], instance=conn)
166    >>> pipe_a.register()
167    >>> pipe_b.register()
168    >>>
169    >>> conn.fetch_pipes_keys(['a', 'b'])
170    [('a', 'demo', 'None'), ('b', 'demo', 'None')]
171    >>> conn.fetch_pipes_keys(metric_keys=['demo'])
172    [('a', 'demo', 'None'), ('b', 'demo', 'None')]
173    >>> conn.fetch_pipes_keys(tags=['foo'])
174    [('a', 'demo', 'None')]
175    >>> conn.fetch_pipes_keys(location_keys=[None])
176    [('a', 'demo', 'None'), ('b', 'demo', 'None')]
177    """

Return registered pipes' keys according to the provided filters.

May return either a list of key tuples or a dictionary mapping pipe IDs to key tuples. When returning a dictionary, the key is the pipe's unique ID (int or str). Tuples may be length 3 (connector_keys, metric_key, location_key) or length 4 with parameters or tags appended as the fourth element.

Parameters
  • connector_keys (list[str] | None, default None): The keys passed via -c.
  • metric_keys (list[str] | None, default None): The keys passed via -m.
  • location_keys (list[str] | None, default None): The keys passed via -l.
  • tags (List[str] | None, default None): Tags passed via --tags which are stored under parameters:tags.
Returns
  • A list of tuples or a dictionary mapping pipe IDs to tuples.
  • You may return the string "None" for location keys in place of nulls.
Examples
>>> import meerschaum as mrsm
>>> conn = mrsm.get_connector('example:demo')
>>>
>>> pipe_a = mrsm.Pipe('a', 'demo', tags=['foo'], instance=conn)
>>> pipe_b = mrsm.Pipe('b', 'demo', tags=['bar'], instance=conn)
>>> pipe_a.register()
>>> pipe_b.register()
>>>
>>> conn.fetch_pipes_keys(['a', 'b'])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
>>> conn.fetch_pipes_keys(metric_keys=['demo'])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
>>> conn.fetch_pipes_keys(tags=['foo'])
[('a', 'demo', 'None')]
>>> conn.fetch_pipes_keys(location_keys=[None])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
@abc.abstractmethod
def pipe_exists( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> bool:
179@abc.abstractmethod
180def pipe_exists(
181    self,
182    pipe: mrsm.Pipe,
183    debug: bool = False,
184    **kwargs: Any
185) -> bool:
186    """
187    Check whether a pipe's target table exists.
188
189    Parameters
190    ----------
191    pipe: mrsm.Pipe
192        The pipe to check whether its table exists.
193
194    Returns
195    -------
196    A `bool` indicating the table exists.
197    """

Check whether a pipe's target table exists.

Parameters
  • pipe (mrsm.Pipe): The pipe to check whether its table exists.
Returns
  • A bool indicating the table exists.
@abc.abstractmethod
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
199@abc.abstractmethod
200def drop_pipe(
201    self,
202    pipe: mrsm.Pipe,
203    debug: bool = False,
204    **kwargs: Any
205) -> mrsm.SuccessTuple:
206    """
207    Drop a pipe's collection if it exists.
208
209    Parameters
210    ----------
211    pipe: mrsm.Pipe
212        The pipe to be dropped.
213
214    Returns
215    -------
216    A `SuccessTuple` indicating success.
217    """
218    raise NotImplementedError

Drop a pipe's collection if it exists.

Parameters
  • pipe (mrsm.Pipe): The pipe to be dropped.
Returns
  • A SuccessTuple indicating success.
def drop_pipe_indices( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
220def drop_pipe_indices(
221    self,
222    pipe: mrsm.Pipe,
223    debug: bool = False,
224    **kwargs: Any
225) -> mrsm.SuccessTuple:
226    """
227    Drop a pipe's indices.
228
229    Parameters
230    ----------
231    pipe: mrsm.Pipe
232        The pipe whose indices need to be dropped.
233
234    Returns
235    -------
236    A `SuccessTuple` indicating success.
237    """
238    return False, f"Cannot drop indices for instance connectors of type '{self.type}'."

Drop a pipe's indices.

Parameters
  • pipe (mrsm.Pipe): The pipe whose indices need to be dropped.
Returns
  • A SuccessTuple indicating success.
@abc.abstractmethod
def sync_pipe( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'" = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, chunksize: Optional[int] = -1, check_existing: bool = True, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
240@abc.abstractmethod
241def sync_pipe(
242    self,
243    pipe: mrsm.Pipe,
244    df: 'pd.DataFrame' = None,
245    begin: Union[datetime, int, None] = None,
246    end: Union[datetime, int, None] = None,
247    chunksize: Optional[int] = -1,
248    check_existing: bool = True,
249    debug: bool = False,
250    **kwargs: Any
251) -> mrsm.SuccessTuple:
252    """
253    Sync a pipe using a database connection.
254
255    Parameters
256    ----------
257    pipe: mrsm.Pipe
258        The Meerschaum Pipe instance into which to sync the data.
259
260    df: Optional[pd.DataFrame]
261        An optional DataFrame or equivalent to sync into the pipe.
262        Defaults to `None`.
263
264    begin: Union[datetime, int, None], default None
265        Optionally specify the earliest datetime to search for data.
266        Defaults to `None`.
267
268    end: Union[datetime, int, None], default None
269        Optionally specify the latest datetime to search for data.
270        Defaults to `None`.
271
272    chunksize: Optional[int], default -1
273        Specify the number of rows to sync per chunk.
274        If `-1`, resort to system configuration (default is `900`).
275        A `chunksize` of `None` will sync all rows in one transaction.
276        Defaults to `-1`.
277
278    check_existing: bool, default True
279        If `True`, pull and diff with existing data from the pipe. Defaults to `True`.
280
281    debug: bool, default False
282        Verbosity toggle. Defaults to False.
283
284    Returns
285    -------
286    A `SuccessTuple` of success (`bool`) and message (`str`).
287    """

Sync a pipe using a database connection.

Parameters
  • pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
  • df (Optional[pd.DataFrame]): An optional DataFrame or equivalent to sync into the pipe. Defaults to None.
  • begin (Union[datetime, int, None], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Union[datetime, int, None], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe. Defaults to True.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
Returns
  • A SuccessTuple of success (bool) and message (str).
def create_pipe_indices( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
289def create_pipe_indices(
290    self,
291    pipe: mrsm.Pipe,
292    debug: bool = False,
293    **kwargs: Any
294) -> mrsm.SuccessTuple:
295    """
296    Create a pipe's indices.
297
298    Parameters
299    ----------
300    pipe: mrsm.Pipe
301        The pipe whose indices need to be created.
302
303    Returns
304    -------
305    A `SuccessTuple` indicating success.
306    """
307    return False, f"Cannot create indices for instance connectors of type '{self.type}'."

Create a pipe's indices.

Parameters
  • pipe (mrsm.Pipe): The pipe whose indices need to be created.
Returns
  • A SuccessTuple indicating success.
def clear_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
309def clear_pipe(
310    self,
311    pipe: mrsm.Pipe,
312    begin: Union[datetime, int, None] = None,
313    end: Union[datetime, int, None] = None,
314    params: Optional[Dict[str, Any]] = None,
315    debug: bool = False,
316    **kwargs: Any
317) -> mrsm.SuccessTuple:
318    """
319    Delete rows within `begin`, `end`, and `params`.
320
321    Parameters
322    ----------
323    pipe: mrsm.Pipe
324        The pipe whose rows to clear.
325
326    begin: datetime | int | None, default None
327        If provided, remove rows >= `begin`.
328
329    end: datetime | int | None, default None
330        If provided, remove rows < `end`.
331
332    params: dict[str, Any] | None, default None
333        If provided, only remove rows which match the `params` filter.
334
335    Returns
336    -------
337    A `SuccessTuple` indicating success.
338    """
339    raise NotImplementedError

Delete rows within begin, end, and params.

Parameters
  • pipe (mrsm.Pipe): The pipe whose rows to clear.
  • begin (datetime | int | None, default None): If provided, remove rows >= begin.
  • end (datetime | int | None, default None): If provided, remove rows < end.
  • params (dict[str, Any] | None, default None): If provided, only remove rows which match the params filter.
Returns
  • A SuccessTuple indicating success.
def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> "Union['pd.DataFrame', None]":
341def get_pipe_data(
342    self,
343    pipe: mrsm.Pipe,
344    select_columns: Optional[List[str]] = None,
345    omit_columns: Optional[List[str]] = None,
346    begin: Union[datetime, int, None] = None,
347    end: Union[datetime, int, None] = None,
348    params: Optional[Dict[str, Any]] = None,
349    debug: bool = False,
350    **kwargs: Any
351) -> Union['pd.DataFrame', None]:
352    """
353    Query a pipe's target table and return the DataFrame.
354
355    Parameters
356    ----------
357    pipe: mrsm.Pipe
358        The pipe with the target table from which to read.
359
360    select_columns: list[str] | None, default None
361        If provided, only select these given columns.
362        Otherwise select all available columns (i.e. `SELECT *`).
363
364    omit_columns: list[str] | None, default None
365        If provided, remove these columns from the selection.
366
367    begin: datetime | int | None, default None
368        The earliest `datetime` value to search from (inclusive).
369
370    end: datetime | int | None, default None
371        The lastest `datetime` value to search from (exclusive).
372
373    params: dict[str | str] | None, default None
374        Additional filters to apply to the query.
375
376    Returns
377    -------
378    The target table's data as a DataFrame.
379    """
380    if type(self).get_pipe_docs is get_pipe_docs:
381        raise NotImplementedError(
382            f"Missing `get_pipe_data()` or `get_pipe_docs()` for {type(self)}."
383        )
384
385    docs = self.get_pipe_docs(
386        pipe=pipe,
387        select_columns=select_columns,
388        omit_columns=omit_columns,
389        begin=begin,
390        end=end,
391        params=params,
392        debug=debug,
393        **kwargs
394    )
395    if not docs:
396        return None
397
398    pd = mrsm.attempt_import('pandas')
399    try:
400        return pd.DataFrame(docs)
401    except Exception as e:
402        from meerschaum.utils.warnings import warn
403        warn(f"Cannot build DataFrame from pipe docs:\n{e}")
404    
405    return None

Query a pipe's target table and return the DataFrame.

Parameters
  • pipe (mrsm.Pipe): The pipe with the target table from which to read.
  • select_columns (list[str] | None, default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (list[str] | None, default None): If provided, remove these columns from the selection.
  • begin (datetime | int | None, default None): The earliest datetime value to search from (inclusive).
  • end (datetime | int | None, default None): The lastest datetime value to search from (exclusive).
  • params (dict[str | str] | None, default None): Additional filters to apply to the query.
Returns
  • The target table's data as a DataFrame.
def get_pipe_docs( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> list[dict[str, typing.Any]]:
407def get_pipe_docs(
408    self,
409    pipe: mrsm.Pipe,
410    select_columns: Optional[List[str]] = None,
411    omit_columns: Optional[List[str]] = None,
412    begin: Union[datetime, int, None] = None,
413    end: Union[datetime, int, None] = None,
414    params: Optional[Dict[str, Any]] = None,
415    debug: bool = False,
416    **kwargs: Any
417) -> list[dict[str, Any]]:
418    """
419    Return a pipe's data as a list of documents.
420    Defaults to `get_pipe_data().to_dict(orient='records')`.
421
422    Parameters
423    ----------
424    pipe: mrsm.Pipe
425        The pipe with the target table from which to read.
426
427    select_columns: list[str] | None, default None
428        If provided, only select these given columns.
429        Otherwise select all available columns (i.e. `SELECT *`).
430
431    omit_columns: list[str] | None, default None
432        If provided, remove these columns from the selection.
433
434    begin: datetime | int | None, default None
435        The earliest `datetime` value to search from (inclusive).
436
437    end: datetime | int | None, default None
438        The lastest `datetime` value to search from (exclusive).
439
440    params: dict[str | str] | None, default None
441        Additional filters to apply to the query.
442
443    Returns
444    -------
445    The target table's data as a list of dictionaries.
446    """
447    df = self.get_pipe_data(
448        pipe=pipe,
449        select_columns=select_columns,
450        omit_columns=omit_columns,
451        begin=begin,
452        end=end,
453        params=params,
454        debug=debug,
455        **kwargs
456    )
457    if df is None or df.empty:
458        return []
459    return df.to_dict(orient='records')

Return a pipe's data as a list of documents. Defaults to get_pipe_data().to_dict(orient='records').

Parameters
  • pipe (mrsm.Pipe): The pipe with the target table from which to read.
  • select_columns (list[str] | None, default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (list[str] | None, default None): If provided, remove these columns from the selection.
  • begin (datetime | int | None, default None): The earliest datetime value to search from (inclusive).
  • end (datetime | int | None, default None): The lastest datetime value to search from (exclusive).
  • params (dict[str | str] | None, default None): Additional filters to apply to the query.
Returns
  • The target table's data as a list of dictionaries.
@abc.abstractmethod
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, debug: bool = False, **kwargs: Any) -> datetime.datetime | int | None:
461@abc.abstractmethod
462def get_sync_time(
463    self,
464    pipe: mrsm.Pipe,
465    params: Optional[Dict[str, Any]] = None,
466    newest: bool = True,
467    debug: bool = False,
468    **kwargs: Any
469) -> datetime | int | None:
470    """
471    Return the most recent value for the `datetime` axis.
472
473    Parameters
474    ----------
475    pipe: mrsm.Pipe
476        The pipe whose collection contains documents.
477
478    params: dict[str, Any] | None, default None
479        Filter certain parameters when determining the sync time.
480
481    newest: bool, default True
482        If `True`, return the maximum value for the column.
483
484    Returns
485    -------
486    The largest `datetime` or `int` value of the `datetime` axis. 
487    """

Return the most recent value for the datetime axis.

Parameters
  • pipe (mrsm.Pipe): The pipe whose collection contains documents.
  • params (dict[str, Any] | None, default None): Filter certain parameters when determining the sync time.
  • newest (bool, default True): If True, return the maximum value for the column.
Returns
  • The largest datetime or int value of the datetime axis.
@abc.abstractmethod
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Dict[str, str]:
489@abc.abstractmethod
490def get_pipe_columns_types(
491    self,
492    pipe: mrsm.Pipe,
493    debug: bool = False,
494    **kwargs: Any
495) -> Dict[str, str]:
496    """
497    Return the data types for the columns in the target table for data type enforcement.
498
499    Parameters
500    ----------
501    pipe: mrsm.Pipe
502        The pipe whose target table contains columns and data types.
503
504    Returns
505    -------
506    A dictionary mapping columns to data types.
507    """

Return the data types for the columns in the target table for data type enforcement.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table contains columns and data types.
Returns
  • A dictionary mapping columns to data types.
def get_pipe_columns_indices(self, debug: bool = False) -> Dict[str, List[Dict[str, str]]]:
509def get_pipe_columns_indices(
510    self,
511    debug: bool = False,
512) -> Dict[str, List[Dict[str, str]]]:
513    """
514    Return a dictionary mapping columns to metadata about related indices.
515
516    Parameters
517    ----------
518    pipe: mrsm.Pipe
519        The pipe whose target table has related indices.
520
521    Returns
522    -------
523    A list of dictionaries with the keys "type" and "name".
524
525    Examples
526    --------
527    >>> pipe = mrsm.Pipe('demo', 'shirts', columns={'primary': 'id'}, indices={'size_color': ['color', 'size']})
528    >>> pipe.sync([{'color': 'red', 'size': 'M'}])
529    >>> pipe.get_columns_indices()
530    {'id': [{'name': 'demo_shirts_pkey', 'type': 'PRIMARY KEY'}], 'color': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}], 'size': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}]}
531    """
532    return {}

Return a dictionary mapping columns to metadata about related indices.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table has related indices.
Returns
  • A list of dictionaries with the keys "type" and "name".
Examples
>>> pipe = mrsm.Pipe('demo', 'shirts', columns={'primary': 'id'}, indices={'size_color': ['color', 'size']})
>>> pipe.sync([{'color': 'red', 'size': 'M'}])
>>> pipe.get_columns_indices()
{'id': [{'name': 'demo_shirts_pkey', 'type': 'PRIMARY KEY'}], 'color': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}], 'size': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}]}
def get_pipe_size( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Optional[int]:
534def get_pipe_size(
535    self,
536    pipe: mrsm.Pipe,
537    debug: bool = False,
538    **kwargs: Any
539) -> Union[int, None]:
540    """
541    Return the on-disk size of a pipe's target table in bytes.
542
543    Parameters
544    ----------
545    pipe: mrsm.Pipe
546        The pipe whose target table size to measure.
547
548    Returns
549    -------
550    An `int` of the number of bytes occupied by the target table,
551    or `None` if the size cannot be determined.
552    """
553    raise NotImplementedError(
554        f"`get_pipe_size()` is not implemented for instance connectors of type '{self.type}'."
555    )

Return the on-disk size of a pipe's target table in bytes.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table size to measure.
Returns
  • An int of the number of bytes occupied by the target table,
  • or None if the size cannot be determined.
def compress_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
557def compress_pipe(
558    self,
559    pipe: mrsm.Pipe,
560    debug: bool = False,
561    **kwargs: Any
562) -> mrsm.SuccessTuple:
563    """
564    Compress a pipe's target table to reduce disk usage.
565
566    Parameters
567    ----------
568    pipe: mrsm.Pipe
569        The pipe whose target table to compress.
570
571    Returns
572    -------
573    A `SuccessTuple` indicating success.
574    """
575    return False, (
576        f"Compression is not supported for instance connectors of type '{self.type}'."
577    )

Compress a pipe's target table to reduce disk usage.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to compress.
Returns
  • A SuccessTuple indicating success.
def decompress_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
579def decompress_pipe(
580    self,
581    pipe: mrsm.Pipe,
582    debug: bool = False,
583    **kwargs: Any
584) -> mrsm.SuccessTuple:
585    """
586    Decompress a pipe's target table, the inverse of `compress_pipe()`.
587
588    Parameters
589    ----------
590    pipe: mrsm.Pipe
591        The pipe whose target table to decompress.
592
593    Returns
594    -------
595    A `SuccessTuple` indicating success.
596    """
597    return False, (
598        f"Decompression is not supported for instance connectors of type '{self.type}'."
599    )

Decompress a pipe's target table, the inverse of compress_pipe().

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to decompress.
Returns
  • A SuccessTuple indicating success.
def vacuum_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
601def vacuum_pipe(
602    self,
603    pipe: mrsm.Pipe,
604    debug: bool = False,
605    **kwargs: Any
606) -> mrsm.SuccessTuple:
607    """
608    Reclaim disk space from a pipe's target table.
609
610    Parameters
611    ----------
612    pipe: mrsm.Pipe
613        The pipe whose target table to vacuum.
614
615    Returns
616    -------
617    A `SuccessTuple` indicating success.
618    """
619    return False, (
620        f"Vacuuming is not supported for instance connectors of type '{self.type}'."
621    )

Reclaim disk space from a pipe's target table.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to vacuum.
Returns
  • A SuccessTuple indicating success.
def analyze_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
623def analyze_pipe(
624    self,
625    pipe: mrsm.Pipe,
626    debug: bool = False,
627    **kwargs: Any
628) -> mrsm.SuccessTuple:
629    """
630    Refresh the planner statistics for a pipe's target table.
631
632    Parameters
633    ----------
634    pipe: mrsm.Pipe
635        The pipe whose target table to analyze.
636
637    Returns
638    -------
639    A `SuccessTuple` indicating success.
640    """
641    return False, (
642        f"Analyzing is not supported for instance connectors of type '{self.type}'."
643    )

Refresh the planner statistics for a pipe's target table.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to analyze.
Returns
  • A SuccessTuple indicating success.
def partition_pipe( self, pipe: meerschaum.Pipe, chunk_minutes: Optional[int] = None, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
645def partition_pipe(
646    self,
647    pipe: mrsm.Pipe,
648    chunk_minutes: Optional[int] = None,
649    debug: bool = False,
650    **kwargs: Any
651) -> mrsm.SuccessTuple:
652    """
653    Rebuild a pipe's target table to a new partition (chunk) width.
654
655    Parameters
656    ----------
657    pipe: mrsm.Pipe
658        The partitioned pipe whose target table to repartition.
659
660    chunk_minutes: Optional[int], default None
661        The new partition width in minutes. Defaults to the pipe's `verify.chunk_minutes`.
662
663    Returns
664    -------
665    A `SuccessTuple` indicating success.
666    """
667    return False, (
668        f"Repartitioning is not supported for instance connectors of type '{self.type}'."
669    )

Rebuild a pipe's target table to a new partition (chunk) width.

Parameters
  • pipe (mrsm.Pipe): The partitioned pipe whose target table to repartition.
  • chunk_minutes (Optional[int], default None): The new partition width in minutes. Defaults to the pipe's verify.chunk_minutes.
Returns
  • A SuccessTuple indicating success.
class SQLConnector(meerschaum.connectors.InstanceConnector):
 20class SQLConnector(InstanceConnector):
 21    """
 22    Connect to SQL databases via `sqlalchemy`.
 23    
 24    SQLConnectors may be used as Meerschaum instance connectors.
 25    Read more about connectors and instances at
 26    https://meerschaum.io/reference/connectors/
 27
 28    """
 29
 30    from ._create_engine import flavor_configs, create_engine
 31    from ._sql import (
 32        read,
 33        value,
 34        exec,
 35        execute,
 36        to_sql,
 37        exec_queries,
 38        get_connection,
 39        _cleanup_connections,
 40    )
 41    from meerschaum.utils.sql import test_connection
 42    from ._fetch import fetch, get_pipe_metadef
 43    from ._cli import cli, _cli_exit
 44    from ._compress import (
 45        get_pipe_size,
 46        compress_pipe,
 47        decompress_pipe,
 48        apply_compression_policy,
 49        _get_compress_settings,
 50        _is_hypertable,
 51        _get_columnstore_settings_query,
 52        _get_columnstore_policy_query,
 53        _get_columnstore_remove_policy_query,
 54        _get_columnstore_disable_query,
 55    )
 56    from ._maintenance import (
 57        vacuum_pipe,
 58        analyze_pipe,
 59        _run_in_autocommit,
 60        _get_vacuum_queries,
 61        _get_analyze_query,
 62    )
 63    from ._partition import (
 64        _should_partition,
 65        _get_partition_column,
 66        _get_partition_count,
 67        _get_chunk_count_timescaledb,
 68        get_partition_info,
 69        partition_pipe,
 70        _partition_bounds,
 71        _partition_literal,
 72        _partition_name,
 73        _get_partition_ranges_for_df,
 74        _get_initial_partition_bounds,
 75        _create_missing_partitions,
 76        _create_missing_partitions_pg,
 77        _create_missing_partitions_mysql,
 78        _get_mysql_max_partition_bound,
 79        _partition_function_name,
 80        _partition_scheme_name,
 81        _get_partition_boundary_values,
 82        _get_mssql_partition_creation_queries,
 83        _get_mssql_max_partition_boundary,
 84        _create_missing_partitions_mssql,
 85        _get_partition_cleanup_queries,
 86    )
 87    from ._pipes import (
 88        fetch_pipes_keys,
 89        create_indices,
 90        drop_indices,
 91        get_create_index_queries,
 92        get_drop_index_queries,
 93        get_add_columns_queries,
 94        get_alter_columns_queries,
 95        delete_pipe,
 96        get_pipe_data,
 97        get_pipe_docs,
 98        get_pipe_data_query,
 99        register_pipe,
100        edit_pipe,
101        get_pipe_id,
102        get_pipe_attributes,
103        sync_pipe,
104        sync_pipe_inplace,
105        get_sync_time,
106        pipe_exists,
107        get_pipe_rowcount,
108        drop_pipe,
109        clear_pipe,
110        deduplicate_pipe,
111        get_pipe_table,
112        get_pipe_columns_types,
113        get_to_sql_dtype,
114        get_pipe_schema,
115        create_pipe_table_from_df,
116        get_pipe_columns_indices,
117        get_temporary_target,
118        create_pipe_indices,
119        drop_pipe_indices,
120        get_pipe_index_names,
121        _init_geopackage_pipe,
122    )
123    from ._plugins import (
124        get_plugins_pipe,
125        register_plugin,
126        delete_plugin,
127        get_plugin_id,
128        get_plugin_version,
129        get_plugins,
130        get_plugin_user_id,
131        get_plugin_username,
132        get_plugin_attributes,
133    )
134    from ._users import (
135        get_users_pipe,
136        register_user,
137        get_user_id,
138        get_users,
139        edit_user,
140        delete_user,
141        get_user_password_hash,
142        get_user_type,
143        get_user_attributes,
144    )
145    from ._uri import from_uri, parse_uri
146    from ._instance import (
147        _log_temporary_tables_creation,
148        _drop_temporary_table,
149        _drop_temporary_tables,
150        _drop_old_temporary_tables,
151    )
152
153    def __init__(
154        self,
155        label: Optional[str] = None,
156        flavor: Optional[str] = None,
157        wait: bool = False,
158        connect: bool = False,
159        debug: bool = False,
160        **kw: Any
161    ):
162        """
163        Parameters
164        ----------
165        label: str, default 'main'
166            The identifying label for the connector.
167            E.g. for `sql:main`, 'main' is the label.
168            Defaults to 'main'.
169
170        flavor: Optional[str], default None
171            The database flavor, e.g.
172            `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc.
173            To see supported flavors, run the `bootstrap connectors` command.
174
175        wait: bool, default False
176            If `True`, block until a database connection has been made.
177            Defaults to `False`.
178
179        connect: bool, default False
180            If `True`, immediately attempt to connect the database and raise
181            a warning if the connection fails.
182            Defaults to `False`.
183
184        debug: bool, default False
185            Verbosity toggle.
186            Defaults to `False`.
187
188        kw: Any
189            All other arguments will be passed to the connector's attributes.
190            Therefore, a connector may be made without being registered,
191            as long enough parameters are supplied to the constructor.
192        """
193        if 'uri' in kw:
194            uri = kw['uri']
195            if uri.startswith('postgres') and not uri.startswith('postgresql'):
196                uri = uri.replace('postgres', 'postgresql', 1)
197            if uri.startswith('postgresql') and not uri.startswith('postgresql+'):
198                uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1)
199            if uri.startswith('timescaledb://'):
200                uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1)
201                flavor = 'timescaledb'
202            if uri.startswith('timescaledb-ha://'):
203                uri = uri.replace('timescaledb-ha://', 'postgresql+psycopg://', 1)
204                flavor = 'timescaledb-ha'
205            if uri.startswith('postgis://'):
206                uri = uri.replace('postgis://', 'postgresql+psycopg://', 1)
207                flavor = 'postgis'
208            kw['uri'] = uri
209            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
210            label = label or from_uri_params.get('label', None)
211            _ = from_uri_params.pop('label', None)
212
213            ### Sometimes the flavor may be provided with a URI.
214            kw.update(from_uri_params)
215            if flavor:
216                kw['flavor'] = flavor
217
218        ### set __dict__ in base class
219        super().__init__(
220            'sql',
221            label = label or self.__dict__.get('label', None),
222            **kw
223        )
224
225        if self.__dict__.get('flavor', None) in ('sqlite', 'geopackage'):
226            self._reset_attributes()
227            self._set_attributes(
228                'sql',
229                label = label,
230                inherit_default = False,
231                **kw
232            )
233            ### For backwards compatability reasons, set the path for sql:local if its missing.
234            if (
235                self.label == 'local'
236                and self.__dict__.get('database', None) in (None, '{SQLITE_DB_PATH}')
237            ):
238                import meerschaum.config.paths as paths
239                self.database = paths.SQLITE_DB_PATH.as_posix()
240
241        ### ensure flavor and label are set accordingly
242        if 'flavor' not in self.__dict__:
243            if flavor is None and 'uri' not in self.__dict__:
244                raise ValueError(
245                    f"    Missing flavor. Provide flavor as a key for '{self}'."
246                )
247            self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None)
248
249        if self.flavor == 'postgres':
250            self.flavor = 'postgresql'
251
252        self._debug = debug
253        ### Store the PID and thread at initialization
254        ### so we can dispose of the Pool in child processes or threads.
255        import os
256        import threading
257        self._pid = os.getpid()
258        self._thread_ident = threading.current_thread().ident
259        self._sessions = {}
260        self._locks = {'_sessions': threading.RLock(), }
261
262        ### verify the flavor's requirements are met
263        if self.flavor not in self.flavor_configs:
264            error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector")
265        if not self.__dict__.get('uri'):
266            self.verify_attributes(
267                self.flavor_configs[self.flavor].get('requirements', set()),
268                debug=debug,
269            )
270
271        if wait:
272            from meerschaum.connectors.poll import retry_connect
273            retry_connect(connector=self, debug=debug)
274
275        if connect:
276            if not self.test_connection(debug=debug):
277                warn(f"Failed to connect with connector '{self}'!", stack=False)
278
279    @property
280    def Session(self):
281        if '_Session' not in self.__dict__:
282            if self.engine is None:
283                return None
284
285            from meerschaum.utils.packages import attempt_import
286            sqlalchemy_orm = attempt_import('sqlalchemy.orm', lazy=False)
287            session_factory = sqlalchemy_orm.sessionmaker(self.engine)
288            self._Session = sqlalchemy_orm.scoped_session(session_factory)
289
290        return self._Session
291
292    @property
293    def engine(self):
294        """
295        Return the SQLAlchemy engine connected to the configured database.
296        """
297        import os
298        import threading
299        if '_engine' not in self.__dict__:
300            self._engine, self._engine_str = self.create_engine(include_uri=True)
301
302        same_process = os.getpid() == self._pid
303        same_thread = threading.current_thread().ident == self._thread_ident
304
305        ### handle child processes
306        if not same_process:
307            self._pid = os.getpid()
308            self._thread = threading.current_thread()
309            warn("Different PID detected. Disposing of connections...")
310            self._engine.dispose()
311
312        ### handle different threads
313        if not same_thread:
314            if self.flavor == 'duckdb':
315                warn("Different thread detected.")
316                self._engine.dispose()
317
318        return self._engine
319
320    @property
321    def DATABASE_URL(self) -> str:
322        """
323        Return the URI connection string (alias for `SQLConnector.URI`.
324        """
325        _ = self.engine
326        return str(self._engine_str)
327
328    @property
329    def URI(self) -> str:
330        """
331        Return the URI connection string.
332        """
333        _ = self.engine
334        return str(self._engine_str)
335
336    @property
337    def IS_THREAD_SAFE(self) -> str:
338        """
339        Return whether this connector may be multithreaded.
340        """
341        if self.flavor in ('duckdb', 'oracle'):
342            return False
343        if self.flavor in ('sqlite', 'geopackage'):
344            return ':memory:' not in self.URI
345        return True
346
347    @property
348    def metadata(self):
349        """
350        Return the metadata bound to this configured schema.
351        """
352        from meerschaum.utils.packages import attempt_import
353        sqlalchemy = attempt_import('sqlalchemy', lazy=False)
354        if '_metadata' not in self.__dict__:
355            self._metadata = sqlalchemy.MetaData(schema=self.schema)
356        return self._metadata
357
358    @property
359    def instance_schema(self):
360        """
361        Return the schema name for Meerschaum tables. 
362        """
363        return self.schema
364
365    @property
366    def internal_schema(self):
367        """
368        Return the schema name for internal tables. 
369        """
370        from meerschaum._internal.static import STATIC_CONFIG
371        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
372        schema_name = self.__dict__.get('internal_schema', None) or (
373            STATIC_CONFIG['sql']['internal_schema']
374            if self.flavor not in NO_SCHEMA_FLAVORS
375            else self.schema
376        )
377
378        if '_internal_schema' not in self.__dict__:
379            self._internal_schema = schema_name
380        return self._internal_schema
381
382    @property
383    def db(self) -> Optional[databases.Database]:
384        from meerschaum.utils.packages import attempt_import
385        databases = attempt_import('databases', lazy=False, install=True)
386        url = self.DATABASE_URL
387        if 'mysql' in url:
388            url = url.replace('+pymysql', '')
389        if '_db' not in self.__dict__:
390            try:
391                self._db = databases.Database(url)
392            except KeyError:
393                ### Likely encountered an unsupported flavor.
394                from meerschaum.utils.warnings import warn
395                self._db = None
396        return self._db
397
398    @property
399    def db_version(self) -> Union[str, None]:
400        """
401        Return the database version.
402        """
403        _db_version = self.__dict__.get('_db_version', None)
404        if _db_version is not None:
405            return _db_version
406
407        from meerschaum.utils.sql import get_db_version
408        self._db_version = get_db_version(self)
409        return self._db_version
410
411    @property
412    def schema(self) -> Union[str, None]:
413        """
414        Return the default schema to use.
415        A value of `None` will not prepend a schema.
416        """
417        if 'schema' in self.__dict__:
418            return self.__dict__['schema']
419
420        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
421        if self.flavor in NO_SCHEMA_FLAVORS:
422            self.__dict__['schema'] = None
423            return None
424
425        sqlalchemy = mrsm.attempt_import('sqlalchemy', lazy=False)
426        _schema = sqlalchemy.inspect(self.engine).default_schema_name
427        self.__dict__['schema'] = _schema
428        return _schema
429
430    def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path:
431        """
432        Return the path to the file to which to write metadata cache.
433        """
434        import meerschaum.config.paths as paths
435        filename = (
436            f'{self.label}-metadata.pkl'
437            if kind == 'pkl'
438            else f'{self.label}.json'
439        )
440        return paths.SQL_CONN_CACHE_RESOURCES_PATH / filename
441
442    def __getstate__(self):
443        return self.__dict__
444
445    def __setstate__(self, d):
446        self.__dict__.update(d)
447
448    def __call__(self):
449        return self

Connect to SQL databases via sqlalchemy.

SQLConnectors may be used as Meerschaum instance connectors. Read more about connectors and instances at https://meerschaum.io/reference/connectors/

SQLConnector( label: Optional[str] = None, flavor: Optional[str] = None, wait: bool = False, connect: bool = False, debug: bool = False, **kw: Any)
153    def __init__(
154        self,
155        label: Optional[str] = None,
156        flavor: Optional[str] = None,
157        wait: bool = False,
158        connect: bool = False,
159        debug: bool = False,
160        **kw: Any
161    ):
162        """
163        Parameters
164        ----------
165        label: str, default 'main'
166            The identifying label for the connector.
167            E.g. for `sql:main`, 'main' is the label.
168            Defaults to 'main'.
169
170        flavor: Optional[str], default None
171            The database flavor, e.g.
172            `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc.
173            To see supported flavors, run the `bootstrap connectors` command.
174
175        wait: bool, default False
176            If `True`, block until a database connection has been made.
177            Defaults to `False`.
178
179        connect: bool, default False
180            If `True`, immediately attempt to connect the database and raise
181            a warning if the connection fails.
182            Defaults to `False`.
183
184        debug: bool, default False
185            Verbosity toggle.
186            Defaults to `False`.
187
188        kw: Any
189            All other arguments will be passed to the connector's attributes.
190            Therefore, a connector may be made without being registered,
191            as long enough parameters are supplied to the constructor.
192        """
193        if 'uri' in kw:
194            uri = kw['uri']
195            if uri.startswith('postgres') and not uri.startswith('postgresql'):
196                uri = uri.replace('postgres', 'postgresql', 1)
197            if uri.startswith('postgresql') and not uri.startswith('postgresql+'):
198                uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1)
199            if uri.startswith('timescaledb://'):
200                uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1)
201                flavor = 'timescaledb'
202            if uri.startswith('timescaledb-ha://'):
203                uri = uri.replace('timescaledb-ha://', 'postgresql+psycopg://', 1)
204                flavor = 'timescaledb-ha'
205            if uri.startswith('postgis://'):
206                uri = uri.replace('postgis://', 'postgresql+psycopg://', 1)
207                flavor = 'postgis'
208            kw['uri'] = uri
209            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
210            label = label or from_uri_params.get('label', None)
211            _ = from_uri_params.pop('label', None)
212
213            ### Sometimes the flavor may be provided with a URI.
214            kw.update(from_uri_params)
215            if flavor:
216                kw['flavor'] = flavor
217
218        ### set __dict__ in base class
219        super().__init__(
220            'sql',
221            label = label or self.__dict__.get('label', None),
222            **kw
223        )
224
225        if self.__dict__.get('flavor', None) in ('sqlite', 'geopackage'):
226            self._reset_attributes()
227            self._set_attributes(
228                'sql',
229                label = label,
230                inherit_default = False,
231                **kw
232            )
233            ### For backwards compatability reasons, set the path for sql:local if its missing.
234            if (
235                self.label == 'local'
236                and self.__dict__.get('database', None) in (None, '{SQLITE_DB_PATH}')
237            ):
238                import meerschaum.config.paths as paths
239                self.database = paths.SQLITE_DB_PATH.as_posix()
240
241        ### ensure flavor and label are set accordingly
242        if 'flavor' not in self.__dict__:
243            if flavor is None and 'uri' not in self.__dict__:
244                raise ValueError(
245                    f"    Missing flavor. Provide flavor as a key for '{self}'."
246                )
247            self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None)
248
249        if self.flavor == 'postgres':
250            self.flavor = 'postgresql'
251
252        self._debug = debug
253        ### Store the PID and thread at initialization
254        ### so we can dispose of the Pool in child processes or threads.
255        import os
256        import threading
257        self._pid = os.getpid()
258        self._thread_ident = threading.current_thread().ident
259        self._sessions = {}
260        self._locks = {'_sessions': threading.RLock(), }
261
262        ### verify the flavor's requirements are met
263        if self.flavor not in self.flavor_configs:
264            error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector")
265        if not self.__dict__.get('uri'):
266            self.verify_attributes(
267                self.flavor_configs[self.flavor].get('requirements', set()),
268                debug=debug,
269            )
270
271        if wait:
272            from meerschaum.connectors.poll import retry_connect
273            retry_connect(connector=self, debug=debug)
274
275        if connect:
276            if not self.test_connection(debug=debug):
277                warn(f"Failed to connect with connector '{self}'!", stack=False)
Parameters
  • label (str, default 'main'): The identifying label for the connector. E.g. for sql:main, 'main' is the label. Defaults to 'main'.
  • flavor (Optional[str], default None): The database flavor, e.g. 'sqlite', 'postgresql', 'cockroachdb', etc. To see supported flavors, run the bootstrap connectors command.
  • wait (bool, default False): If True, block until a database connection has been made. Defaults to False.
  • connect (bool, default False): If True, immediately attempt to connect the database and raise a warning if the connection fails. Defaults to False.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
  • kw (Any): All other arguments will be passed to the connector's attributes. Therefore, a connector may be made without being registered, as long enough parameters are supplied to the constructor.
Session
279    @property
280    def Session(self):
281        if '_Session' not in self.__dict__:
282            if self.engine is None:
283                return None
284
285            from meerschaum.utils.packages import attempt_import
286            sqlalchemy_orm = attempt_import('sqlalchemy.orm', lazy=False)
287            session_factory = sqlalchemy_orm.sessionmaker(self.engine)
288            self._Session = sqlalchemy_orm.scoped_session(session_factory)
289
290        return self._Session
engine
292    @property
293    def engine(self):
294        """
295        Return the SQLAlchemy engine connected to the configured database.
296        """
297        import os
298        import threading
299        if '_engine' not in self.__dict__:
300            self._engine, self._engine_str = self.create_engine(include_uri=True)
301
302        same_process = os.getpid() == self._pid
303        same_thread = threading.current_thread().ident == self._thread_ident
304
305        ### handle child processes
306        if not same_process:
307            self._pid = os.getpid()
308            self._thread = threading.current_thread()
309            warn("Different PID detected. Disposing of connections...")
310            self._engine.dispose()
311
312        ### handle different threads
313        if not same_thread:
314            if self.flavor == 'duckdb':
315                warn("Different thread detected.")
316                self._engine.dispose()
317
318        return self._engine

Return the SQLAlchemy engine connected to the configured database.

DATABASE_URL: str
320    @property
321    def DATABASE_URL(self) -> str:
322        """
323        Return the URI connection string (alias for `SQLConnector.URI`.
324        """
325        _ = self.engine
326        return str(self._engine_str)

Return the URI connection string (alias for SQLConnector.URI.

URI: str
328    @property
329    def URI(self) -> str:
330        """
331        Return the URI connection string.
332        """
333        _ = self.engine
334        return str(self._engine_str)

Return the URI connection string.

IS_THREAD_SAFE: str
336    @property
337    def IS_THREAD_SAFE(self) -> str:
338        """
339        Return whether this connector may be multithreaded.
340        """
341        if self.flavor in ('duckdb', 'oracle'):
342            return False
343        if self.flavor in ('sqlite', 'geopackage'):
344            return ':memory:' not in self.URI
345        return True

Return whether this connector may be multithreaded.

metadata
347    @property
348    def metadata(self):
349        """
350        Return the metadata bound to this configured schema.
351        """
352        from meerschaum.utils.packages import attempt_import
353        sqlalchemy = attempt_import('sqlalchemy', lazy=False)
354        if '_metadata' not in self.__dict__:
355            self._metadata = sqlalchemy.MetaData(schema=self.schema)
356        return self._metadata

Return the metadata bound to this configured schema.

instance_schema
358    @property
359    def instance_schema(self):
360        """
361        Return the schema name for Meerschaum tables. 
362        """
363        return self.schema

Return the schema name for Meerschaum tables.

internal_schema
365    @property
366    def internal_schema(self):
367        """
368        Return the schema name for internal tables. 
369        """
370        from meerschaum._internal.static import STATIC_CONFIG
371        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
372        schema_name = self.__dict__.get('internal_schema', None) or (
373            STATIC_CONFIG['sql']['internal_schema']
374            if self.flavor not in NO_SCHEMA_FLAVORS
375            else self.schema
376        )
377
378        if '_internal_schema' not in self.__dict__:
379            self._internal_schema = schema_name
380        return self._internal_schema

Return the schema name for internal tables.

db: 'Optional[databases.Database]'
382    @property
383    def db(self) -> Optional[databases.Database]:
384        from meerschaum.utils.packages import attempt_import
385        databases = attempt_import('databases', lazy=False, install=True)
386        url = self.DATABASE_URL
387        if 'mysql' in url:
388            url = url.replace('+pymysql', '')
389        if '_db' not in self.__dict__:
390            try:
391                self._db = databases.Database(url)
392            except KeyError:
393                ### Likely encountered an unsupported flavor.
394                from meerschaum.utils.warnings import warn
395                self._db = None
396        return self._db
db_version: Optional[str]
398    @property
399    def db_version(self) -> Union[str, None]:
400        """
401        Return the database version.
402        """
403        _db_version = self.__dict__.get('_db_version', None)
404        if _db_version is not None:
405            return _db_version
406
407        from meerschaum.utils.sql import get_db_version
408        self._db_version = get_db_version(self)
409        return self._db_version

Return the database version.

schema: Optional[str]
411    @property
412    def schema(self) -> Union[str, None]:
413        """
414        Return the default schema to use.
415        A value of `None` will not prepend a schema.
416        """
417        if 'schema' in self.__dict__:
418            return self.__dict__['schema']
419
420        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
421        if self.flavor in NO_SCHEMA_FLAVORS:
422            self.__dict__['schema'] = None
423            return None
424
425        sqlalchemy = mrsm.attempt_import('sqlalchemy', lazy=False)
426        _schema = sqlalchemy.inspect(self.engine).default_schema_name
427        self.__dict__['schema'] = _schema
428        return _schema

Return the default schema to use. A value of None will not prepend a schema.

def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path:
430    def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path:
431        """
432        Return the path to the file to which to write metadata cache.
433        """
434        import meerschaum.config.paths as paths
435        filename = (
436            f'{self.label}-metadata.pkl'
437            if kind == 'pkl'
438            else f'{self.label}.json'
439        )
440        return paths.SQL_CONN_CACHE_RESOURCES_PATH / filename

Return the path to the file to which to write metadata cache.

flavor_configs = {'timescaledb': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 5432}}, 'timescaledb-ha': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 5432}}, 'postgresql': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 5432}}, 'postgis': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 5432}}, 'citus': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 5432}}, 'mssql': {'engine': 'mssql+pyodbc', 'create_engine': {'fast_executemany': True, 'use_insertmanyvalues': False, 'isolation_level': 'AUTOCOMMIT', 'use_setinputsizes': False, 'pool_pre_ping': True, 'ignore_no_transaction_on_rollback': True}, 'omit_create_engine': {'method'}, 'to_sql': {'method': None}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 1433, 'options': 'driver=ODBC Driver 18 for SQL Server&UseFMTONLY=Yes&TrustServerCertificate=yes&Encrypt=no&MARS_Connection=yes'}}, 'mysql': {'engine': 'mysql+pymysql', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 3306}}, 'mariadb': {'engine': 'mysql+pymysql', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 3306}}, 'oracle': {'engine': 'oracle+oracledb', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': None}, 'requirements': {'username', 'password', 'database', 'host'}, 'defaults': {'port': 1521}}, 'sqlite': {'engine': 'sqlite', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'database'}, 'defaults': {}}, 'geopackage': {'engine': 'sqlite', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'database'}, 'defaults': {}}, 'duckdb': {'engine': 'duckdb', 'create_engine': {}, 'omit_create_engine': {'ALL'}, 'to_sql': {'method': 'multi'}, 'requirements': '', 'defaults': {}}, 'cockroachdb': {'engine': 'cockroachdb', 'omit_create_engine': {'method'}, 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'to_sql': {'method': 'multi'}, 'requirements': {'host'}, 'defaults': {'port': 26257, 'database': 'defaultdb', 'username': 'root', 'password': 'admin'}}}
def create_engine( self, include_uri: bool = False, debug: bool = False, **kw) -> 'sqlalchemy.engine.Engine':
 45def create_engine(
 46    self,
 47    include_uri: bool = False,
 48    debug: bool = False,
 49    **kw
 50) -> 'sqlalchemy.engine.Engine':
 51    """Create a sqlalchemy engine by building the engine string."""
 52    from meerschaum.utils.packages import attempt_import
 53    from meerschaum.utils.warnings import error, warn
 54    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
 55    import urllib
 56    import copy
 57    ### Install and patch required drivers.
 58    if self.flavor in install_flavor_drivers:
 59        _ = attempt_import(
 60            *install_flavor_drivers[self.flavor],
 61            debug=debug,
 62            lazy=False,
 63            warn=False,
 64        )
 65        if self.flavor == 'mssql':
 66            _init_mssql_sqlalchemy()
 67
 68    ### supplement missing values with defaults (e.g. port number)
 69    for a, value in flavor_configs[self.flavor]['defaults'].items():
 70        if a not in self.__dict__:
 71            self.__dict__[a] = value
 72
 73    ### Verify that everything is in order.
 74    if self.flavor not in flavor_configs:
 75        error(f"Cannot create a connector with the flavor '{self.flavor}'.")
 76
 77    _engine = flavor_configs[self.flavor].get('engine', None)
 78    _username = self.__dict__.get('username', None)
 79    _password = self.__dict__.get('password', None)
 80    _host = self.__dict__.get('host', None)
 81    _port = self.__dict__.get('port', None)
 82    _database = self.__dict__.get('database', None)
 83    if _database == '{SQLITE_DB_PATH}':
 84        import meerschaum.config.paths as paths
 85        _database = paths.SQLITE_DB_PATH.as_posix()
 86    _options = self.__dict__.get('options', {})
 87    if isinstance(_options, str):
 88        _options = dict(urllib.parse.parse_qsl(_options))
 89    _uri = self.__dict__.get('uri', None)
 90
 91    ### Handle registering specific dialects (due to installing in virtual environments).
 92    if self.flavor in flavor_dialects:
 93        sqlalchemy.dialects.registry.register(*flavor_dialects[self.flavor])
 94
 95    ### self._sys_config was deepcopied and can be updated safely
 96    if self.flavor in ("sqlite", "duckdb", "geopackage"):
 97        engine_str = f"{_engine}:///{_database}" if not _uri else _uri
 98        if 'create_engine' not in self._sys_config:
 99            self._sys_config['create_engine'] = {}
100        if 'connect_args' not in self._sys_config['create_engine']:
101            self._sys_config['create_engine']['connect_args'] = {}
102        self._sys_config['create_engine']['connect_args'].update({"check_same_thread": False})
103    else:
104        engine_str = (
105            _engine + "://" + (_username if _username is not None else '') +
106            ((":" + urllib.parse.quote_plus(_password)) if _password is not None else '') +
107            "@" + _host + ((":" + str(_port)) if _port is not None else '') +
108            (("/" + _database) if _database is not None else '')
109            + (("?" + urllib.parse.urlencode(_options)) if _options else '')
110        ) if not _uri else _uri
111
112        ### Sometimes the timescaledb:// flavor can slip in.
113        if _uri and self.flavor in _uri:
114            if self.flavor in ('timescaledb', 'timescaledb-ha', 'postgis'):
115                engine_str = engine_str.replace(self.flavor, 'postgresql', 1)
116            elif _uri.startswith('postgresql://'):
117                engine_str = engine_str.replace('postgresql://', 'postgresql+psycopg2://')
118
119    if debug:
120        dprint(
121            (
122                (engine_str.replace(':' + _password, ':' + ('*' * len(_password))))
123                    if _password is not None else engine_str
124            ) + '\n' + f"{self._sys_config.get('create_engine', {}).get('connect_args', {})}"
125        )
126
127    _kw_copy = copy.deepcopy(kw)
128
129    ### NOTE: Order of inheritance:
130    ###       1. Defaults
131    ###       2. System configuration
132    ###       3. Connector configuration
133    ###       4. Keyword arguments
134    _create_engine_args = flavor_configs.get(self.flavor, {}).get('create_engine', {})
135    def _apply_create_engine_args(update):
136        if 'ALL' not in flavor_configs[self.flavor].get('omit_create_engine', {}):
137            _create_engine_args.update(
138                { k: v for k, v in update.items()
139                    if 'omit_create_engine' not in flavor_configs[self.flavor]
140                        or k not in flavor_configs[self.flavor].get('omit_create_engine')
141                }
142            )
143    _apply_create_engine_args(self._sys_config.get('create_engine', {}))
144    _apply_create_engine_args(self.__dict__.get('create_engine', {}))
145    _apply_create_engine_args(_kw_copy)
146
147    try:
148        engine = sqlalchemy.create_engine(
149            engine_str,
150            ### I know this looks confusing, and maybe it's bad code,
151            ### but it's simple. It dynamically parses the config string
152            ### and splits it to separate the class name (QueuePool)
153            ### from the module name (sqlalchemy.pool).
154            poolclass    = getattr(
155                attempt_import(
156                    ".".join(self._sys_config['poolclass'].split('.')[:-1])
157                ),
158                self._sys_config['poolclass'].split('.')[-1]
159            ),
160            echo         = debug,
161            **_create_engine_args
162        )
163    except Exception:
164        warn(f"Failed to create connector '{self}':\n{traceback.format_exc()}", stack=False)
165        engine = None
166
167    if include_uri:
168        return engine, engine_str
169    return engine

Create a sqlalchemy engine by building the engine string.

def read( self, query_or_table: 'Union[str, sqlalchemy.Query]', params: Union[Dict[str, Any], List[str], NoneType] = None, dtype: Optional[Dict[str, Any]] = None, coerce_float: bool = True, chunksize: Optional[int] = -1, workers: Optional[int] = None, chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None, as_hook_results: bool = False, chunks: Optional[int] = None, schema: Optional[str] = None, as_chunks: bool = False, as_iterator: bool = False, as_dask: bool = False, index_col: Optional[str] = None, silent: bool = False, debug: bool = False, **kw: Any) -> 'Union[pandas.DataFrame, dask.DataFrame, List[pandas.DataFrame], List[Any], None]':
 35def read(
 36    self,
 37    query_or_table: Union[str, sqlalchemy.Query],
 38    params: Union[Dict[str, Any], List[str], None] = None,
 39    dtype: Optional[Dict[str, Any]] = None,
 40    coerce_float: bool = True,
 41    chunksize: Optional[int] = -1,
 42    workers: Optional[int] = None,
 43    chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None,
 44    as_hook_results: bool = False,
 45    chunks: Optional[int] = None,
 46    schema: Optional[str] = None,
 47    as_chunks: bool = False,
 48    as_iterator: bool = False,
 49    as_dask: bool = False,
 50    index_col: Optional[str] = None,
 51    silent: bool = False,
 52    debug: bool = False,
 53    **kw: Any
 54) -> Union[
 55    pandas.DataFrame,
 56    dask.DataFrame,
 57    List[pandas.DataFrame],
 58    List[Any],
 59    None,
 60]:
 61    """
 62    Read a SQL query or table into a pandas dataframe.
 63
 64    Parameters
 65    ----------
 66    query_or_table: Union[str, sqlalchemy.Query]
 67        The SQL query (sqlalchemy Query or string) or name of the table from which to select.
 68
 69    params: Optional[Dict[str, Any]], default None
 70        `List` or `Dict` of parameters to pass to `pandas.read_sql()`.
 71        See the pandas documentation for more information:
 72        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html
 73
 74    dtype: Optional[Dict[str, Any]], default None
 75        A dictionary of data types to pass to `pandas.read_sql()`.
 76        See the pandas documentation for more information:
 77        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html
 78
 79    chunksize: Optional[int], default -1
 80        How many chunks to read at a time. `None` will read everything in one large chunk.
 81        Defaults to system configuration.
 82
 83        **NOTE:** DuckDB does not allow for chunking.
 84
 85    workers: Optional[int], default None
 86        How many threads to use when consuming the generator.
 87        Only applies if `chunk_hook` is provided.
 88
 89    chunk_hook: Optional[Callable[[pandas.DataFrame], Any]], default None
 90        Hook function to execute once per chunk, e.g. writing and reading chunks intermittently.
 91        See `--sync-chunks` for an example.
 92        **NOTE:** `as_iterator` MUST be False (default).
 93
 94    as_hook_results: bool, default False
 95        If `True`, return a `List` of the outputs of the hook function.
 96        Only applicable if `chunk_hook` is not None.
 97
 98        **NOTE:** `as_iterator` MUST be `False` (default).
 99
100    chunks: Optional[int], default None
101        Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and
102        return into a single dataframe.
103        For example, to limit the returned dataframe to 100,000 rows,
104        you could specify a `chunksize` of `1000` and `chunks` of `100`.
105
106    schema: Optional[str], default None
107        If just a table name is provided, optionally specify the table schema.
108        Defaults to `SQLConnector.schema`.
109
110    as_chunks: bool, default False
111        If `True`, return a list of DataFrames.
112        Otherwise return a single DataFrame.
113
114    as_iterator: bool, default False
115        If `True`, return the pandas DataFrame iterator.
116        `chunksize` must not be `None` (falls back to 1000 if so),
117        and hooks are not called in this case.
118
119    index_col: Optional[str], default None
120        If using Dask, use this column as the index column.
121        If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
122
123    silent: bool, default False
124        If `True`, don't raise warnings in case of errors.
125        Defaults to `False`.
126
127    Returns
128    -------
129    A `pd.DataFrame` (default case), or an iterator, or a list of dataframes / iterators,
130    or `None` if something breaks.
131
132    """
133    if chunks is not None and chunks <= 0:
134        return []
135
136    from meerschaum.utils.sql import sql_item_name, truncate_item_name
137    from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
138    from meerschaum.utils.dtypes.sql import TIMEZONE_NAIVE_FLAVORS
139    from meerschaum.utils.packages import attempt_import, import_pandas
140    from meerschaum.utils.pool import get_pool
141    from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
142    from meerschaum.utils.misc import filter_arguments
143    import warnings
144    import traceback
145    from decimal import Decimal
146
147    pd = import_pandas()
148    dd = None
149
150    is_dask = 'dask' in pd.__name__
151    pandas = attempt_import('pandas')
152    is_dask = dd is not None
153    npartitions = chunksize_to_npartitions(chunksize)
154    if is_dask:
155        chunksize = None
156
157    schema = schema or self.schema
158    utc_dt_cols = [
159        col
160        for col, typ in dtype.items()
161        if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
162    ] if dtype else []
163
164    if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
165        dtype = dtype.copy()
166        for col in utc_dt_cols:
167            dtype[col] = 'datetime64[us]'
168
169    pool = get_pool(workers=workers)
170    sqlalchemy = attempt_import("sqlalchemy", lazy=False)
171    default_chunksize = self._sys_config.get('chunksize', None)
172    chunksize = chunksize if chunksize != -1 else default_chunksize
173    if chunksize is None and as_iterator:
174        if not silent and self.flavor not in _disallow_chunks_flavors:
175            warn(
176                "An iterator may only be generated if chunksize is not None.\n"
177                + "Falling back to a chunksize of 1000.", stacklevel=3,
178            )
179        chunksize = 1000
180    if chunksize is not None and self.flavor in _max_chunks_flavors:
181        if chunksize > _max_chunks_flavors[self.flavor]:
182            if chunksize != default_chunksize:
183                warn(
184                    f"The specified chunksize of {chunksize} exceeds the maximum of "
185                    + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
186                    + f"    Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
187                    stacklevel=3,
188                )
189            chunksize = _max_chunks_flavors[self.flavor]
190
191    if chunksize is not None and self.flavor in _disallow_chunks_flavors:
192        chunksize = None
193
194    if debug:
195        import time
196        start = time.perf_counter()
197        dprint(f"[{self}]\n{query_or_table}")
198        dprint(f"[{self}] Fetching with chunksize: {chunksize}")
199
200    ### This might be sqlalchemy object or the string of a table name.
201    ### We check for spaces and quotes to see if it might be a weird table.
202    if (
203        ' ' not in str(query_or_table)
204        or (
205            ' ' in str(query_or_table)
206            and str(query_or_table).startswith('"')
207            and str(query_or_table).endswith('"')
208        )
209    ):
210        truncated_table_name = truncate_item_name(str(query_or_table), self.flavor)
211        if truncated_table_name != str(query_or_table) and not silent:
212            if self.flavor not in ('oracle', 'mysql', 'mariadb'):
213                warn(
214                    f"Table '{query_or_table}' is too long for '{self.flavor}',"
215                    + f" will instead read the table '{truncated_table_name}'."
216                )
217
218        query_or_table = sql_item_name(str(query_or_table), self.flavor, schema)
219        if debug:
220            dprint(f"[{self}] Reading from table {query_or_table}")
221        formatted_query = sqlalchemy.text("SELECT * FROM " + str(query_or_table))
222        str_query = f"SELECT * FROM {query_or_table}"
223    else:
224        str_query = query_or_table
225
226    formatted_query = (
227        sqlalchemy.text(str_query)
228        if not is_dask and isinstance(str_query, str)
229        else format_sql_query_for_dask(str_query)
230    )
231
232    def _get_chunk_args_kwargs(_chunk):
233        return filter_arguments(
234            chunk_hook,
235            _chunk,
236            workers=workers,
237            chunksize=chunksize,
238            debug=debug,
239            **kw
240        )
241
242    chunk_list = []
243    chunk_hook_results = []
244    def _process_chunk(_chunk, _retry_on_failure: bool = True):
245        if self.flavor in TIMEZONE_NAIVE_FLAVORS:
246            for col in utc_dt_cols:
247                _chunk[col] = coerce_timezone(_chunk[col], strip_utc=False)
248        if not as_hook_results:
249            chunk_list.append(_chunk)
250
251        if chunk_hook is None:
252            return None
253
254        chunk_args, chunk_kwargs = _get_chunk_args_kwargs(_chunk)
255
256        result = None
257        try:
258            result = chunk_hook(*chunk_args, **chunk_kwargs)
259        except Exception:
260            result = False, traceback.format_exc()
261            from meerschaum.utils.formatting import get_console
262            if not silent:
263                get_console().print_exception()
264
265        ### If the chunk fails to process, try it again one more time.
266        if isinstance(result, tuple) and result[0] is False:
267            if _retry_on_failure:
268                return _process_chunk(_chunk, _retry_on_failure=False)
269
270        return result
271
272    try:
273        stream_results = not as_iterator and chunk_hook is not None and chunksize is not None
274        with warnings.catch_warnings():
275            warnings.filterwarnings('ignore', 'case sensitivity issues')
276
277            read_sql_query_kwargs = {
278                'params': params,
279                'dtype': dtype,
280                'coerce_float': coerce_float,
281                'index_col': index_col,
282            }
283            if is_dask:
284                if index_col is None:
285                    dd = None
286                    pd = attempt_import('pandas')
287                    read_sql_query_kwargs.update({
288                        'chunksize': chunksize,
289                    })
290            else:
291                read_sql_query_kwargs.update({
292                    'chunksize': chunksize,
293                })
294
295            if is_dask and dd is not None:
296                ddf = dd.read_sql_query(
297                    formatted_query,
298                    self.URI,
299                    **read_sql_query_kwargs
300                )
301            else:
302
303                def get_chunk_generator(connectable):
304                    chunk_generator = pd.read_sql_query(
305                        formatted_query,
306                        connectable, # NOTE: test this against `self.engine`.
307                        **read_sql_query_kwargs
308                    )
309
310                    to_return = (
311                        (
312                            chunk_generator
313                            if not (as_hook_results or chunksize is None)
314                            else (
315                                _process_chunk(_chunk)
316                                for _chunk in chunk_generator
317                            )
318                        )
319                        if as_iterator or chunksize is None
320                        else (
321                            list(pool.imap(_process_chunk, chunk_generator))
322                            if as_hook_results
323                            else None
324                        )
325                    )
326                    return chunk_generator, to_return
327
328                if self.flavor in SKIP_READ_TRANSACTION_FLAVORS:
329                    chunk_generator, to_return = get_chunk_generator(self.engine)
330                else:
331                    with self.engine.begin() as transaction:
332                        with transaction.execution_options(
333                            stream_results=stream_results,
334                        ) as connection:
335                            chunk_generator, to_return = get_chunk_generator(connection)
336
337                if to_return is not None:
338                    return to_return
339
340    except Exception as e:
341        if debug:
342            dprint(f"[{self}] Failed to execute query:\n\n{query_or_table}\n\n")
343        if not silent:
344            warn(str(e), stacklevel=3)
345        from meerschaum.utils.formatting import get_console
346        if not silent:
347            get_console().print_exception()
348
349        return None
350
351    if is_dask and dd is not None:
352        ddf = ddf.reset_index()
353        return ddf
354
355    chunk_list = []
356    read_chunks = 0
357    chunk_hook_results = []
358    if chunksize is None:
359        chunk_list.append(chunk_generator)
360    elif as_iterator:
361        return chunk_generator
362    else:
363        try:
364            for chunk in chunk_generator:
365                if chunk_hook is not None:
366                    chunk_args, chunk_kwargs = _get_chunk_args_kwargs(chunk)
367                    chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs))
368                chunk_list.append(chunk)
369                read_chunks += 1
370                if chunks is not None and read_chunks >= chunks:
371                    break
372        except Exception as e:
373            warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3)
374            from meerschaum.utils.formatting import get_console
375            if not silent:
376                get_console().print_exception()
377
378    read_chunks = 0
379    try:
380        for chunk in chunk_generator:
381            if chunk_hook is not None:
382                chunk_args, chunk_kwargs = _get_chunk_args_kwargs(chunk)
383                chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs))
384            chunk_list.append(chunk)
385            read_chunks += 1
386            if chunks is not None and read_chunks >= chunks:
387                break
388    except Exception as e:
389        warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3)
390        from meerschaum.utils.formatting import get_console
391        if not silent:
392            get_console().print_exception()
393
394        return None
395
396    ### If no chunks returned, read without chunks
397    ### to get columns
398    if len(chunk_list) == 0:
399        with warnings.catch_warnings():
400            warnings.filterwarnings('ignore', 'case sensitivity issues')
401            _ = read_sql_query_kwargs.pop('chunksize', None)
402            with self.engine.begin() as connection:
403                chunk_list.append(
404                    pd.read_sql_query(
405                        formatted_query,
406                        connection,
407                        **read_sql_query_kwargs
408                    )
409                )
410
411    ### call the hook on any missed chunks.
412    if chunk_hook is not None and len(chunk_list) > len(chunk_hook_results):
413        for c in chunk_list[len(chunk_hook_results):]:
414            chunk_args, chunk_kwargs = _get_chunk_args_kwargs(c)
415            chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs))
416
417    ### chunksize is not None so must iterate
418    if debug:
419        end = time.perf_counter()
420        dprint(f"Fetched {len(chunk_list)} chunks in {round(end - start, 2)} seconds.")
421
422    if as_hook_results:
423        return chunk_hook_results
424    
425    ### Skip `pd.concat()` if `as_chunks` is specified.
426    if as_chunks:
427        for c in chunk_list:
428            c.reset_index(drop=True, inplace=True)
429            for col in get_numeric_cols(c):
430                c[col] = c[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x)
431        return chunk_list
432
433    df = pd.concat(chunk_list).reset_index(drop=True)
434    ### NOTE: The calls to `canonical()` are to drop leading and trailing zeroes.
435    for col in get_numeric_cols(df):
436        df[col] = df[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x)
437
438    return df

Read a SQL query or table into a pandas dataframe.

Parameters
  • query_or_table (Union[str, sqlalchemy.Query]): The SQL query (sqlalchemy Query or string) or name of the table from which to select.
  • params (Optional[Dict[str, Any]], default None): List or Dict of parameters to pass to pandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html
  • dtype (Optional[Dict[str, Any]], default None): A dictionary of data types to pass to pandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html
  • chunksize (Optional[int], default -1): How many chunks to read at a time. None will read everything in one large chunk. Defaults to system configuration.

    NOTE: DuckDB does not allow for chunking.

  • workers (Optional[int], default None): How many threads to use when consuming the generator. Only applies if chunk_hook is provided.
  • chunk_hook (Optional[Callable[[pandas.DataFrame], Any]], default None): Hook function to execute once per chunk, e.g. writing and reading chunks intermittently. See --sync-chunks for an example. NOTE: as_iterator MUST be False (default).
  • as_hook_results (bool, default False): If True, return a List of the outputs of the hook function. Only applicable if chunk_hook is not None.

    NOTE: as_iterator MUST be False (default).

  • chunks (Optional[int], default None): Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and return into a single dataframe. For example, to limit the returned dataframe to 100,000 rows, you could specify a chunksize of 1000 and chunks of 100.
  • schema (Optional[str], default None): If just a table name is provided, optionally specify the table schema. Defaults to SQLConnector.schema.
  • as_chunks (bool, default False): If True, return a list of DataFrames. Otherwise return a single DataFrame.
  • as_iterator (bool, default False): If True, return the pandas DataFrame iterator. chunksize must not be None (falls back to 1000 if so), and hooks are not called in this case.
  • index_col (Optional[str], default None): If using Dask, use this column as the index column. If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
  • silent (bool, default False): If True, don't raise warnings in case of errors. Defaults to False.
Returns
  • A pd.DataFrame (default case), or an iterator, or a list of dataframes / iterators,
  • or None if something breaks.
def value(self, query: str, *args: Any, use_pandas: bool = False, **kw: Any) -> Any:
441def value(
442    self,
443    query: str,
444    *args: Any,
445    use_pandas: bool = False,
446    **kw: Any
447) -> Any:
448    """
449    Execute the provided query and return the first value.
450
451    Parameters
452    ----------
453    query: str
454        The SQL query to execute.
455        
456    *args: Any
457        The arguments passed to `meerschaum.connectors.sql.SQLConnector.exec`
458        if `use_pandas` is `False` (default) or to `meerschaum.connectors.sql.SQLConnector.read`.
459        
460    use_pandas: bool, default False
461        If `True`, use `meerschaum.connectors.SQLConnector.read`, otherwise use
462        `meerschaum.connectors.sql.SQLConnector.exec` (default).
463        **NOTE:** This is always `True` for DuckDB.
464
465    **kw: Any
466        See `args`.
467
468    Returns
469    -------
470    Any value returned from the query.
471
472    """
473    from meerschaum.utils.packages import attempt_import
474    if self.flavor == 'duckdb':
475        use_pandas = True
476    if use_pandas:
477        try:
478            return self.read(query, *args, **kw).iloc[0, 0]
479        except Exception:
480            return None
481
482    _close = kw.get('close', True)
483    _commit = kw.get('commit', (self.flavor != 'mssql'))
484
485    try:
486        result, connection = self.exec(
487            query,
488            *args,
489            with_connection=True,
490            close=False,
491            commit=_commit,
492            **kw
493        )
494        first = result.first() if result is not None else None
495        _val = first[0] if first is not None else None
496    except Exception as e:
497        warn(e, stacklevel=3)
498        return None
499    if _close:
500        try:
501            connection.close()
502        except Exception as e:
503            warn("Failed to close connection with exception:\n" + str(e))
504    return _val

Execute the provided query and return the first value.

Parameters
Returns
  • Any value returned from the query.
def exec( self, query: str, *args: Any, silent: bool = False, debug: bool = False, commit: Optional[bool] = None, close: Optional[bool] = None, with_connection: bool = False, _connection=None, _transaction=None, **kw: Any) -> 'Union[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.cursor.LegacyCursorResult, Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection], Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection], None]':
518def exec(
519    self,
520    query: str,
521    *args: Any,
522    silent: bool = False,
523    debug: bool = False,
524    commit: Optional[bool] = None,
525    close: Optional[bool] = None,
526    with_connection: bool = False,
527    _connection=None,
528    _transaction=None,
529    **kw: Any
530) -> Union[
531        sqlalchemy.engine.result.resultProxy,
532        sqlalchemy.engine.cursor.LegacyCursorResult,
533        Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection],
534        Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection],
535        None
536]:
537    """
538    Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures.
539
540    If inserting data, please use bind variables to avoid SQL injection!
541
542    Parameters
543    ----------
544    query: Union[str, List[str], Tuple[str]]
545        The query to execute.
546        If `query` is a list or tuple, call `self.exec_queries()` instead.
547
548    args: Any
549        Arguments passed to `sqlalchemy.engine.execute`.
550
551    silent: bool, default False
552        If `True`, suppress warnings.
553
554    commit: Optional[bool], default None
555        If `True`, commit the changes after execution.
556        Causes issues with flavors like `'mssql'`.
557        This does not apply if `query` is a list of strings.
558
559    close: Optional[bool], default None
560        If `True`, close the connection after execution.
561        Causes issues with flavors like `'mssql'`.
562        This does not apply if `query` is a list of strings.
563
564    with_connection: bool, default False
565        If `True`, return a tuple including the connection object.
566        This does not apply if `query` is a list of strings.
567
568    Returns
569    -------
570    The `sqlalchemy` result object, or a tuple with the connection if `with_connection` is provided.
571
572    """
573    if isinstance(query, (list, tuple)):
574        return self.exec_queries(
575            list(query),
576            *args,
577            silent=silent,
578            debug=debug,
579            **kw
580        )
581
582    from meerschaum.utils.packages import attempt_import
583    sqlalchemy = attempt_import("sqlalchemy", lazy=False)
584    if debug:
585        dprint(f"[{self}] Executing query:\n{query}")
586
587    _close = close if close is not None else (self.flavor != 'mssql')
588    _commit = commit if commit is not None else (
589        (self.flavor != 'mssql' or 'select' not in str(query).lower())
590    )
591
592    ### Select and Insert objects need to be compiled (SQLAlchemy 2.0.0+).
593    if not hasattr(query, 'compile'):
594        query = sqlalchemy.text(query)
595
596    connection = _connection if _connection is not None else self.get_connection()
597
598    try:
599        transaction = (
600            _transaction
601            if _transaction is not None else (
602                connection.begin()
603                if _commit
604                else None
605            )
606        )
607    except sqlalchemy.exc.InvalidRequestError as e:
608        if _connection is not None or _transaction is not None:
609            raise e
610        connection = self.get_connection(rebuild=True)
611        transaction = connection.begin()
612
613    if transaction is not None and not transaction.is_active and _transaction is not None:
614        connection = self.get_connection(rebuild=True)
615        transaction = connection.begin() if _commit else None
616
617    result = None
618    try:
619        result = connection.execute(query, *args, **kw)
620        if _commit:
621            transaction.commit()
622    except Exception as e:
623        if debug:
624            dprint(f"[{self}] Failed to execute query:\n\n{query}\n\n{e}")
625        if not silent:
626            warn(str(e), stacklevel=3)
627        result = None
628        if _commit:
629            if debug:
630                dprint(f"[{self}] Rolling back failed transaction...")
631            transaction.rollback()
632            connection = self.get_connection(rebuild=True)
633    finally:
634        if _close:
635            connection.close()
636
637    if debug:
638        dprint(f"[{self}] Done executing.")
639
640    if with_connection:
641        return result, connection
642
643    return result

Execute SQL code and return the sqlalchemy result, e.g. when calling stored procedures.

If inserting data, please use bind variables to avoid SQL injection!

Parameters
  • query (Union[str, List[str], Tuple[str]]): The query to execute. If query is a list or tuple, call self.exec_queries() instead.
  • args (Any): Arguments passed to sqlalchemy.engine.execute.
  • silent (bool, default False): If True, suppress warnings.
  • commit (Optional[bool], default None): If True, commit the changes after execution. Causes issues with flavors like 'mssql'. This does not apply if query is a list of strings.
  • close (Optional[bool], default None): If True, close the connection after execution. Causes issues with flavors like 'mssql'. This does not apply if query is a list of strings.
  • with_connection (bool, default False): If True, return a tuple including the connection object. This does not apply if query is a list of strings.
Returns
  • The sqlalchemy result object, or a tuple with the connection if with_connection is provided.
def execute( self, *args: Any, **kw: Any) -> 'Optional[sqlalchemy.engine.result.resultProxy]':
507def execute(
508    self,
509    *args : Any,
510    **kw : Any
511) -> Optional[sqlalchemy.engine.result.resultProxy]:
512    """
513    An alias for `meerschaum.connectors.sql.SQLConnector.exec`.
514    """
515    return self.exec(*args, **kw)
def to_sql( self, df: pandas.DataFrame, name: str = None, index: bool = False, if_exists: str = 'replace', method: str = '', chunksize: Optional[int] = -1, schema: Optional[str] = None, safe_copy: bool = True, silent: bool = False, debug: bool = False, as_tuple: bool = False, as_dict: bool = False, _connection=None, _transaction=None, **kw) -> Union[bool, Tuple[bool, str]]:
 747def to_sql(
 748    self,
 749    df: pandas.DataFrame,
 750    name: str = None,
 751    index: bool = False,
 752    if_exists: str = 'replace',
 753    method: str = "",
 754    chunksize: Optional[int] = -1,
 755    schema: Optional[str] = None,
 756    safe_copy: bool = True,
 757    silent: bool = False,
 758    debug: bool = False,
 759    as_tuple: bool = False,
 760    as_dict: bool = False,
 761    _connection=None,
 762    _transaction=None,
 763    **kw
 764) -> Union[bool, SuccessTuple]:
 765    """
 766    Upload a DataFrame's contents to the SQL server.
 767
 768    Parameters
 769    ----------
 770    df: pd.DataFrame
 771        The DataFrame to be inserted.
 772
 773    name: str
 774        The name of the table to be created.
 775
 776    index: bool, default False
 777        If True, creates the DataFrame's indices as columns.
 778
 779    if_exists: str, default 'replace'
 780        Drop and create the table ('replace') or append if it exists
 781        ('append') or raise Exception ('fail').
 782        Options are ['replace', 'append', 'fail'].
 783
 784    method: str, default ''
 785        None or multi. Details on pandas.to_sql.
 786
 787    chunksize: Optional[int], default -1
 788        How many rows to insert at a time.
 789
 790    schema: Optional[str], default None
 791        Optionally override the schema for the table.
 792        Defaults to `SQLConnector.schema`.
 793
 794    safe_copy: bool, defaul True
 795        If `True`, copy the dataframe before making any changes.
 796
 797    as_tuple: bool, default False
 798        If `True`, return a (success_bool, message) tuple instead of a `bool`.
 799        Defaults to `False`.
 800
 801    as_dict: bool, default False
 802        If `True`, return a dictionary of transaction information.
 803        The keys are `success`, `msg`, `start`, `end`, `duration`, `num_rows`, `chunksize`,
 804        `method`, and `target`.
 805
 806    kw: Any
 807        Additional arguments will be passed to the DataFrame's `to_sql` function
 808
 809    Returns
 810    -------
 811    Either a `bool` or a `SuccessTuple` (depends on `as_tuple`).
 812    """
 813    import time
 814    import json
 815    from datetime import timedelta
 816    from meerschaum.utils.warnings import error, warn
 817    import warnings
 818    import functools
 819    import traceback
 820
 821    if name is None:
 822        error(f"Name must not be `None` to insert data into {self}.")
 823
 824    ### We're requiring `name` to be positional, and sometimes it's passed in from background jobs.
 825    kw.pop('name', None)
 826
 827    schema = schema or self.schema
 828
 829    from meerschaum.utils.sql import (
 830        sql_item_name,
 831        table_exists,
 832        json_flavors,
 833        truncate_item_name,
 834        DROP_IF_EXISTS_FLAVORS,
 835    )
 836    from meerschaum.utils.dataframe import (
 837        get_json_cols,
 838        get_numeric_cols,
 839        get_uuid_cols,
 840        get_bytes_cols,
 841        get_geometry_cols,
 842    )
 843    from meerschaum.utils.dtypes import (
 844        are_dtypes_equal,
 845        coerce_timezone,
 846        encode_bytes_for_bytea,
 847        serialize_bytes,
 848        serialize_decimal,
 849        serialize_geometry,
 850        json_serialize_value,
 851        get_geometry_type_srid,
 852    )
 853    from meerschaum.utils.dtypes.sql import (
 854        PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
 855        get_db_type_from_pd_type,
 856        get_pd_type_from_db_type,
 857        get_numeric_precision_scale,
 858    )
 859    from meerschaum.utils.misc import interval_str
 860    from meerschaum.connectors.sql._create_engine import flavor_configs
 861    from meerschaum.utils.packages import attempt_import, import_pandas
 862    sqlalchemy = attempt_import('sqlalchemy', debug=debug, lazy=False)
 863    pd = import_pandas()
 864    is_dask = 'dask' in df.__module__
 865
 866    bytes_cols = get_bytes_cols(df)
 867    numeric_cols = get_numeric_cols(df)
 868    geometry_cols = get_geometry_cols(df)
 869    ### NOTE: This excludes non-numeric serialized Decimals (e.g. SQLite).
 870    numeric_cols_dtypes = {
 871        col: typ
 872        for col, typ in kw.get('dtype', {}).items()
 873        if (
 874            col in df.columns
 875            and 'numeric' in str(typ).lower()
 876        )
 877    }
 878    numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
 879    numeric_cols_precisions_scales = {
 880        col: (
 881            (typ.precision, typ.scale)
 882            if hasattr(typ, 'precision')
 883            else get_numeric_precision_scale(self.flavor)
 884        )
 885        for col, typ in numeric_cols_dtypes.items()
 886    }
 887    geometry_cols_dtypes = {
 888        col: typ
 889        for col, typ in kw.get('dtype', {}).items()
 890        if (
 891            col in df.columns
 892            and 'geometry' in str(typ).lower() or 'geography' in str(typ).lower()
 893        )
 894    }
 895    geometry_cols.extend([col for col in geometry_cols_dtypes if col not in geometry_cols])
 896    geometry_cols_types_srids = {
 897        col: (typ.geometry_type, typ.srid)
 898        if hasattr(typ, 'srid')
 899        else get_geometry_type_srid()
 900        for col, typ in geometry_cols_dtypes.items()
 901    }
 902
 903    cols_pd_types = {
 904        col: get_pd_type_from_db_type(str(typ))
 905        for col, typ in kw.get('dtype', {}).items()
 906    }
 907    cols_pd_types.update({
 908        col: f'numeric[{precision},{scale}]'
 909        for col, (precision, scale) in numeric_cols_precisions_scales.items()
 910        if precision and scale
 911    })
 912    cols_db_types = {
 913        col: get_db_type_from_pd_type(typ, flavor=self.flavor)
 914        for col, typ in cols_pd_types.items()
 915    }
 916
 917    enable_bulk_insert = mrsm.get_config(
 918        'system', 'connectors', 'sql', 'bulk_insert', self.flavor,
 919        warn=False,
 920    ) or False
 921    stats = {'target': name}
 922    ### resort to defaults if None
 923    copied = False
 924    use_bulk_insert = False
 925    if method == "":
 926        if enable_bulk_insert:
 927            method = (
 928                functools.partial(mssql_insert_json, cols_types=cols_db_types, debug=debug)
 929                if self.flavor == 'mssql'
 930                else functools.partial(psql_insert_copy, debug=debug)
 931            )
 932            use_bulk_insert = True
 933        else:
 934            ### Should resolve to 'multi' or `None`.
 935            method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
 936
 937    if bytes_cols and (use_bulk_insert or self.flavor == 'oracle'):
 938        if safe_copy and not copied:
 939            df = df.copy()
 940            copied = True
 941        bytes_serializer = (
 942            functools.partial(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
 943            if self.flavor != 'mssql'
 944            else serialize_bytes
 945        )
 946        for col in bytes_cols:
 947            df[col] = df[col].apply(bytes_serializer)
 948
 949    ### Check for numeric columns.
 950    for col in numeric_cols:
 951        precision, scale = numeric_cols_precisions_scales.get(
 952            col,
 953            get_numeric_precision_scale(self.flavor)
 954        )
 955        df[col] = df[col].apply(
 956            functools.partial(
 957                serialize_decimal,
 958                quantize=True,
 959                precision=precision,
 960                scale=scale,
 961            )
 962        )
 963
 964    geometry_format = 'wkt' if self.flavor == 'mssql' else (
 965        'gpkg_wkb'
 966        if self.flavor == 'geopackage'
 967        else 'wkb_hex'
 968    )
 969    for col in geometry_cols:
 970        geometry_type, srid = geometry_cols_types_srids.get(col, get_geometry_type_srid())
 971        with warnings.catch_warnings():
 972            warnings.simplefilter("ignore")
 973            df[col] = df[col].apply(
 974                functools.partial(
 975                    serialize_geometry,
 976                    geometry_format=geometry_format,
 977                )
 978            )
 979
 980    stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
 981
 982    default_chunksize = self._sys_config.get('chunksize', None)
 983    chunksize = chunksize if chunksize != -1 else default_chunksize
 984    if chunksize is not None and self.flavor in _max_chunks_flavors:
 985        if chunksize > _max_chunks_flavors[self.flavor]:
 986            if chunksize != default_chunksize:
 987                warn(
 988                    f"The specified chunksize of {chunksize} exceeds the maximum of "
 989                    + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
 990                    + f"    Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
 991                    stacklevel = 3,
 992                )
 993            chunksize = _max_chunks_flavors[self.flavor]
 994    stats['chunksize'] = chunksize
 995
 996    success, msg = False, "Default to_sql message"
 997    start = time.perf_counter()
 998    if debug:
 999        msg = f"[{self}] Inserting {len(df)} rows with chunksize: {chunksize}..."
1000        print(msg, end="", flush=True)
1001    stats['num_rows'] = len(df)
1002
1003    ### Check if the name is too long.
1004    truncated_name = truncate_item_name(name, self.flavor)
1005    if name != truncated_name:
1006        if self.flavor not in ('oracle', 'mysql', 'mariadb'):
1007            warn(
1008                f"Table '{name}' is too long for '{self.flavor}',"
1009                f" will instead create the table '{truncated_name}'."
1010            )
1011
1012    ### filter out non-pandas args
1013    import inspect
1014    to_sql_params = inspect.signature(df.to_sql).parameters
1015    to_sql_kw = {}
1016    for k, v in kw.items():
1017        if k in to_sql_params:
1018            to_sql_kw[k] = v
1019
1020    to_sql_kw.update({
1021        'name': truncated_name,
1022        'schema': schema,
1023        ('con' if not is_dask else 'uri'): (self.engine if not is_dask else self.URI),
1024        'index': index,
1025        'if_exists': if_exists,
1026        'method': method,
1027        'chunksize': chunksize,
1028    })
1029    if is_dask:
1030        to_sql_kw.update({
1031            'parallel': True,
1032        })
1033    elif _connection is not None:
1034        to_sql_kw['con'] = _connection
1035
1036    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
1037    if self.flavor == 'oracle':
1038        ### For some reason 'replace' doesn't work properly in pandas,
1039        ### so try dropping first.
1040        if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug):
1041            success = self.exec(
1042                f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema)
1043            ) is not None
1044            if not success:
1045                warn(f"Unable to drop {name}")
1046
1047        ### Enforce NVARCHAR(2000) as text instead of CLOB.
1048        dtype = to_sql_kw.get('dtype', {})
1049        for col, typ in df.dtypes.items():
1050            if are_dtypes_equal(str(typ), 'object'):
1051                dtype[col] = sqlalchemy.types.NVARCHAR(2000)
1052            elif are_dtypes_equal(str(typ), 'int'):
1053                dtype[col] = sqlalchemy.types.INTEGER
1054        to_sql_kw['dtype'] = dtype
1055    elif self.flavor == 'duckdb':
1056        dtype = to_sql_kw.get('dtype', {})
1057        dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
1058        for col in dt_cols:
1059            df[col] = coerce_timezone(df[col], strip_utc=False)
1060    elif self.flavor == 'mssql':
1061        dtype = to_sql_kw.get('dtype', {})
1062        dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
1063        new_dtype = {}
1064        for col in dt_cols:
1065            if col in dtype:
1066                continue
1067            dt_typ = get_db_type_from_pd_type(str(df.dtypes[col]), self.flavor, as_sqlalchemy=True)
1068            if col not in dtype:
1069                new_dtype[col] = dt_typ
1070
1071        dtype.update(new_dtype)
1072        to_sql_kw['dtype'] = dtype
1073
1074    ### Check for JSON columns.
1075    if self.flavor not in json_flavors:
1076        json_cols = get_json_cols(df)
1077        for col in json_cols:
1078            df[col] = df[col].apply(
1079                (
1080                    lambda x: json.dumps(x, default=json_serialize_value, sort_keys=True)
1081                    if not isinstance(x, Hashable)
1082                    else x
1083                )
1084            )
1085
1086    if PD_TO_SQLALCHEMY_DTYPES_FLAVORS['uuid'].get(self.flavor, None) != 'Uuid':
1087        uuid_cols = get_uuid_cols(df)
1088        for col in uuid_cols:
1089            df[col] = df[col].astype(str)
1090
1091    try:
1092        with warnings.catch_warnings():
1093            warnings.filterwarnings('ignore')
1094            df.to_sql(**to_sql_kw)
1095        success = True
1096    except Exception:
1097        if not silent:
1098            warn(traceback.format_exc())
1099        success, msg = False, traceback.format_exc()
1100
1101    end = time.perf_counter()
1102    if success:
1103        num_rows = len(df)
1104        msg = (
1105            f"It took {interval_str(timedelta(seconds=(end - start)))} "
1106            + f"to sync {num_rows:,} row"
1107            + ('s' if num_rows != 1 else '')
1108            + f" to {name}."
1109        )
1110    stats['start'] = start
1111    stats['end'] = end
1112    stats['duration'] = end - start
1113
1114    if debug:
1115        print(" done.", flush=True)
1116        dprint(msg)
1117
1118    stats['success'] = success
1119    stats['msg'] = msg
1120    if as_tuple:
1121        return success, msg
1122    if as_dict:
1123        return stats
1124    return success

Upload a DataFrame's contents to the SQL server.

Parameters
  • df (pd.DataFrame): The DataFrame to be inserted.
  • name (str): The name of the table to be created.
  • index (bool, default False): If True, creates the DataFrame's indices as columns.
  • if_exists (str, default 'replace'): Drop and create the table ('replace') or append if it exists ('append') or raise Exception ('fail'). Options are ['replace', 'append', 'fail'].
  • method (str, default ''): None or multi. Details on pandas.to_sql.
  • chunksize (Optional[int], default -1): How many rows to insert at a time.
  • schema (Optional[str], default None): Optionally override the schema for the table. Defaults to SQLConnector.schema.
  • safe_copy (bool, defaul True): If True, copy the dataframe before making any changes.
  • as_tuple (bool, default False): If True, return a (success_bool, message) tuple instead of a bool. Defaults to False.
  • as_dict (bool, default False): If True, return a dictionary of transaction information. The keys are success, msg, start, end, duration, num_rows, chunksize, method, and target.
  • kw (Any): Additional arguments will be passed to the DataFrame's to_sql function
Returns
  • Either a bool or a SuccessTuple (depends on as_tuple).
def exec_queries( self, queries: "List[Union[str, Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]]]", break_on_error: bool = False, rollback: bool = True, silent: bool = False, debug: bool = False) -> 'List[Union[sqlalchemy.engine.cursor.CursorResult, None]]':
646def exec_queries(
647    self,
648    queries: List[
649        Union[
650            str,
651            Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]
652        ]
653    ],
654    break_on_error: bool = False,
655    rollback: bool = True,
656    silent: bool = False,
657    debug: bool = False,
658) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
659    """
660    Execute a list of queries in a single transaction.
661
662    Parameters
663    ----------
664    queries: List[
665        Union[
666            str,
667            Tuple[str, Callable[[], List[str]]]
668        ]
669    ]
670        The queries in the transaction to be executed.
671        If a query is a tuple, the second item of the tuple
672        will be considered a callable hook that returns a list of queries to be executed
673        before the next item in the list.
674
675    break_on_error: bool, default False
676        If `True`, stop executing when a query fails.
677
678    rollback: bool, default True
679        If `break_on_error` is `True`, rollback the transaction if a query fails.
680
681    silent: bool, default False
682        If `True`, suppress warnings.
683
684    Returns
685    -------
686    A list of SQLAlchemy results.
687    """
688    from meerschaum.utils.warnings import warn
689    from meerschaum.utils.debug import dprint
690    from meerschaum.utils.packages import attempt_import
691    sqlalchemy, sqlalchemy_orm = attempt_import('sqlalchemy', 'sqlalchemy.orm', lazy=False)
692    session = sqlalchemy_orm.Session(self.engine)
693
694    result = None
695    results = []
696    with session.begin():
697        for query in queries:
698            hook = None
699            result = None
700
701            if isinstance(query, tuple):
702                query, hook = query
703            if isinstance(query, str):
704                query = sqlalchemy.text(query)
705
706            if debug:
707                dprint(f"[{self}]\n" + str(query))
708
709            try:
710                result = session.execute(query)
711                session.flush()
712            except Exception as e:
713                msg = (f"Encountered error while executing:\n{e}")
714                if not silent:
715                    warn(msg)
716                elif debug:
717                    dprint(f"[{self}]\n" + str(msg))
718                result = None
719
720            if debug:
721                dprint(f"[{self}] Finished executing.")
722
723            if result is None and break_on_error:
724                if rollback:
725                    if debug:
726                        dprint(f"[{self}] Rolling back...")
727                    session.rollback()
728                results.append(result)
729                break
730            elif result is not None and hook is not None:
731                hook_queries = hook(session)
732                if hook_queries:
733                    hook_results = self.exec_queries(
734                        hook_queries,
735                        break_on_error = break_on_error,
736                        rollback=rollback,
737                        silent=silent,
738                        debug=debug,
739                    )
740                    result = (result, hook_results)
741
742            results.append(result)
743
744    return results

Execute a list of queries in a single transaction.

Parameters
  • queries (List[): Union[ str, Tuple[str, Callable[[], List[str]]] ]
  • ]: The queries in the transaction to be executed. If a query is a tuple, the second item of the tuple will be considered a callable hook that returns a list of queries to be executed before the next item in the list.
  • break_on_error (bool, default False): If True, stop executing when a query fails.
  • rollback (bool, default True): If break_on_error is True, rollback the transaction if a query fails.
  • silent (bool, default False): If True, suppress warnings.
Returns
  • A list of SQLAlchemy results.
def get_connection(self, rebuild: bool = False) -> "'sqlalchemy.engine.base.Connection'":
1322def get_connection(self, rebuild: bool = False) -> 'sqlalchemy.engine.base.Connection':
1323    """
1324    Return the current alive connection.
1325
1326    Parameters
1327    ----------
1328    rebuild: bool, default False
1329        If `True`, close the previous connection and open a new one.
1330
1331    Returns
1332    -------
1333    A `sqlalchemy.engine.base.Connection` object.
1334    """
1335    import threading
1336    if '_thread_connections' not in self.__dict__:
1337        self.__dict__['_thread_connections'] = {}
1338
1339    self._cleanup_connections()
1340
1341    thread_id = threading.get_ident()
1342
1343    thread_connections = self.__dict__.get('_thread_connections', {})
1344    connection = thread_connections.get(thread_id, None)
1345
1346    if rebuild and connection is not None:
1347        try:
1348            connection.close()
1349        except Exception:
1350            pass
1351
1352        _ = thread_connections.pop(thread_id, None)
1353        connection = None
1354
1355    if connection is None or connection.closed:
1356        connection = self.engine.connect()
1357        thread_connections[thread_id] = connection
1358
1359    return connection

Return the current alive connection.

Parameters
  • rebuild (bool, default False): If True, close the previous connection and open a new one.
Returns
  • A sqlalchemy.engine.base.Connection object.
def test_connection(self, **kw: Any) -> Optional[bool]:
871def test_connection(
872    self,
873    **kw: Any
874) -> Union[bool, None]:
875    """
876    Test if a successful connection to the database may be made.
877
878    Parameters
879    ----------
880    **kw:
881        The keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
882
883    Returns
884    -------
885    `True` if a connection is made, otherwise `False` or `None` in case of failure.
886
887    """
888    import warnings
889    from meerschaum.connectors.poll import retry_connect
890    _default_kw = {'max_retries': 1, 'retry_wait': 0, 'warn': False, 'connector': self}
891    _default_kw.update(kw)
892    with warnings.catch_warnings():
893        warnings.filterwarnings('ignore', 'Could not')
894        try:
895            return retry_connect(**_default_kw)
896        except Exception:
897            return False

Test if a successful connection to the database may be made.

Parameters
Returns
  • True if a connection is made, otherwise False or None in case of failure.
def fetch( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, str, NoneType] = '', end: Union[datetime.datetime, int, str, NoneType] = None, check_existing: bool = True, chunksize: Optional[int] = -1, workers: Optional[int] = None, debug: bool = False, **kw: Any) -> "Union['pd.DataFrame', List[Any], None]":
18def fetch(
19    self,
20    pipe: mrsm.Pipe,
21    begin: Union[datetime, int, str, None] = '',
22    end: Union[datetime, int, str, None] = None,
23    check_existing: bool = True,
24    chunksize: Optional[int] = -1,
25    workers: Optional[int] = None,
26    debug: bool = False,
27    **kw: Any
28) -> Union['pd.DataFrame', List[Any], None]:
29    """Execute the SQL definition and return a Pandas DataFrame.
30
31    Parameters
32    ----------
33    pipe: mrsm.Pipe
34        The pipe object which contains the `fetch` metadata.
35
36        - pipe.columns['datetime']: str
37            - Name of the datetime column for the remote table.
38        - pipe.parameters['fetch']: Dict[str, Any]
39            - Parameters necessary to execute a query.
40        - pipe.parameters['fetch']['definition']: str
41            - Raw SQL query to execute to generate the pandas DataFrame.
42        - pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
43            - How many minutes before `begin` to search for data (*optional*).
44
45    begin: Union[datetime, int, str, None], default None
46        Most recent datatime to search for data.
47        If `backtrack_minutes` is provided, subtract `backtrack_minutes`.
48
49    end: Union[datetime, int, str, None], default None
50        The latest datetime to search for data.
51        If `end` is `None`, do not bound 
52
53    check_existing: bool, defult True
54        If `False`, use a backtrack interval of 0 minutes.
55
56    chunksize: Optional[int], default -1
57        How many rows to load into memory at once.
58        Otherwise the entire result set is loaded into memory.
59
60    workers: Optional[int], default None
61        How many threads to use when consuming the generator.
62        Defaults to the number of cores.
63
64    debug: bool, default False
65        Verbosity toggle.
66
67    Returns
68    -------
69    A pandas DataFrame generator.
70    """
71    meta_def = self.get_pipe_metadef(
72        pipe,
73        begin=begin,
74        end=end,
75        check_existing=check_existing,
76        debug=debug,
77        **kw
78    )
79    chunks = self.read(
80        meta_def,
81        chunksize=chunksize,
82        workers=workers,
83        as_iterator=True,
84        debug=debug,
85    )
86    return chunks

Execute the SQL definition and return a Pandas DataFrame.

Parameters
  • pipe (mrsm.Pipe): The pipe object which contains the fetch metadata.

    • pipe.columns['datetime']: str
      • Name of the datetime column for the remote table.
    • pipe.parameters['fetch']: Dict[str, Any]
      • Parameters necessary to execute a query.
    • pipe.parameters['fetch']['definition']: str
      • Raw SQL query to execute to generate the pandas DataFrame.
    • pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
      • How many minutes before begin to search for data (optional).
  • begin (Union[datetime, int, str, None], default None): Most recent datatime to search for data. If backtrack_minutes is provided, subtract backtrack_minutes.
  • end (Union[datetime, int, str, None], default None): The latest datetime to search for data. If end is None, do not bound
  • check_existing (bool, defult True): If False, use a backtrack interval of 0 minutes.
  • chunksize (Optional[int], default -1): How many rows to load into memory at once. Otherwise the entire result set is loaded into memory.
  • workers (Optional[int], default None): How many threads to use when consuming the generator. Defaults to the number of cores.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A pandas DataFrame generator.
def get_pipe_metadef( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, begin: Union[datetime.datetime, int, str, NoneType] = '', end: Union[datetime.datetime, int, str, NoneType] = None, check_existing: bool = True, debug: bool = False, **kw: Any) -> Optional[str]:
 89def get_pipe_metadef(
 90    self,
 91    pipe: mrsm.Pipe,
 92    params: Optional[Dict[str, Any]] = None,
 93    begin: Union[datetime, int, str, None] = '',
 94    end: Union[datetime, int, str, None] = None,
 95    check_existing: bool = True,
 96    debug: bool = False,
 97    **kw: Any
 98) -> Union[str, None]:
 99    """
100    Return a pipe's meta definition fetch query.
101
102    params: Optional[Dict[str, Any]], default None
103        Optional params dictionary to build the `WHERE` clause.
104        See `meerschaum.utils.sql.build_where`.
105
106    begin: Union[datetime, int, str, None], default None
107        Most recent datatime to search for data.
108        If `backtrack_minutes` is provided, subtract `backtrack_minutes`.
109
110    end: Union[datetime, int, str, None], default None
111        The latest datetime to search for data.
112        If `end` is `None`, do not bound 
113
114    check_existing: bool, default True
115        If `True`, apply the backtrack interval.
116
117    debug: bool, default False
118        Verbosity toggle.
119
120    Returns
121    -------
122    A pipe's meta definition fetch query string.
123    """
124    from meerschaum.utils.warnings import warn
125    from meerschaum.utils.sql import (
126        sql_item_name,
127        dateadd_str,
128        build_where,
129        wrap_query_with_cte,
130        format_cte_subquery,
131    )
132    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
133    from meerschaum.config import get_config
134    from meerschaum.utils.dtypes import (
135        get_current_timestamp,
136        MRSM_PRECISION_UNITS_SCALARS,
137        MRSM_PRECISION_UNITS_ALIASES,
138    )
139
140    parent = pipe.parent
141    parent_dt_col = parent.columns.get('datetime', None) if parent is not None else None
142    parent_dt_typ = parent.dtypes.get(parent_dt_col, 'datetime') if parent_dt_col else None
143    dt_col = parent_dt_col or pipe.columns.get('datetime', None)
144    dt_typ = parent_dt_typ or pipe.dtypes.get(dt_col, 'datetime')
145    db_dt_typ = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
146    precision = parent.precision if parent_dt_typ else pipe.precision
147    if not dt_col:
148        dt_col = pipe.guess_datetime()
149        dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
150        is_guess = True
151    else:
152        dt_name = sql_item_name(dt_col, self.flavor, None)
153        is_guess = False
154
155    if begin not in (None, '') or end is not None:
156        if is_guess:
157            if dt_col is None:
158                warn(
159                    f"Unable to determine a datetime column for {pipe}."
160                    + "\n    Ignoring begin and end...",
161                    stack=False,
162                )
163                begin, end = '', None
164            else:
165                warn(
166                    f"A datetime wasn't specified for {pipe}.\n"
167                    + f"    Using column \"{dt_col}\" for datetime bounds...",
168                    stack=False
169                )
170
171    apply_backtrack = begin == '' and check_existing
172    backtrack_interval = pipe.get_backtrack_interval(check_existing=check_existing, debug=debug)
173    btm = (
174        int(backtrack_interval.total_seconds() / 60)
175        if isinstance(backtrack_interval, timedelta)
176        else backtrack_interval
177    )
178    begin = (
179        pipe.get_sync_time(debug=debug)
180        if begin == ''
181        else begin
182    )
183
184    if 'int' in dt_typ.lower():
185        precision_unit = precision.get('unit', 'second')
186        if isinstance(begin, datetime):
187            begin = get_current_timestamp(precision_unit, _now=begin, as_int=True)
188        if isinstance(end, datetime):
189            end = get_current_timestamp(precision_unit, _now=end, as_int=True)
190
191        if isinstance(backtrack_interval, timedelta):
192            true_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
193            btm = int(backtrack_interval.total_seconds() * MRSM_PRECISION_UNITS_SCALARS[true_unit])
194
195    if begin not in (None, '') and end is not None and begin >= end:
196        begin = None
197
198    begin_da, end_da = None, None
199    if dt_name:
200        begin_da = (
201            dateadd_str(
202                flavor=self.flavor,
203                datepart=('minute' if not isinstance(begin, int) else None),
204                number=((-1 * btm) if apply_backtrack else 0),
205                begin=begin,
206                db_type=db_dt_typ,
207            )
208            if begin not in ('', None)
209            else None
210        )
211        end_da = (
212            dateadd_str(
213                flavor=self.flavor,
214                datepart=('minute' if not isinstance(end, int) else None),
215                number=0,
216                begin=end,
217                db_type=db_dt_typ,
218            )
219            if end is not None
220            else None
221        )
222
223    definition_name = sql_item_name('definition', self.flavor, None)
224    definition = get_pipe_query(pipe)
225    if definition is None:
226        raise ValueError(f"No SQL definition could be found for {pipe}.")
227
228    ### Attempt to push down the predicate if possible.
229    handled_bounding = False
230    if parent_dt_col and (begin not in (None, '') or end is not None):
231        parent_target = parent.target
232        parent_schema = (
233            parent.instance_connector.get_pipe_schema(parent)
234            if parent.instance_connector.type == 'sql'
235            else None
236        )
237        parent_dt_name = sql_item_name(parent_dt_col, self.flavor, None)
238        parent_db_dt_typ = get_db_type_from_pd_type(parent_dt_typ, self.flavor)
239
240        p_begin, p_end, p_btm = begin, end, btm
241        if 'int' in parent_dt_typ.lower():
242            p_precision_unit = precision.get('unit', 'second')
243            if isinstance(begin, (datetime, int)):
244                _dt_begin = (
245                    begin
246                    if isinstance(begin, datetime)
247                    else datetime.fromtimestamp(
248                        begin / MRSM_PRECISION_UNITS_SCALARS[
249                            MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
250                        ],
251                        timezone.utc
252                    )
253                )
254                p_begin = get_current_timestamp(p_precision_unit, _now=_dt_begin, as_int=True)
255            
256            if isinstance(end, (datetime, int)) and end is not None:
257                _dt_end = (
258                    end
259                    if isinstance(end, datetime)
260                    else datetime.fromtimestamp(
261                        end / MRSM_PRECISION_UNITS_SCALARS[
262                            MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
263                        ], 
264                        timezone.utc
265                    )
266                )
267                p_end = get_current_timestamp(p_precision_unit, _now=_dt_end, as_int=True)
268
269            if isinstance(backtrack_interval, timedelta):
270                p_true_unit = MRSM_PRECISION_UNITS_ALIASES.get(p_precision_unit, p_precision_unit)
271                p_btm = int(
272                    backtrack_interval.total_seconds()
273                    * MRSM_PRECISION_UNITS_SCALARS[p_true_unit]
274                )
275
276        parent_begin_da = (
277            dateadd_str(
278                flavor=self.flavor,
279                datepart='minute',
280                number=((-1 * p_btm) if apply_backtrack else 0),
281                begin=p_begin,
282                db_type=parent_db_dt_typ,
283            )
284            if p_begin not in ('', None)
285            else None
286        )
287        parent_end_da = (
288            dateadd_str(
289                flavor=self.flavor,
290                datepart='minute',
291                number=0,
292                begin=p_end,
293                db_type=parent_db_dt_typ,
294            )
295            if p_end is not None
296            else None
297        )
298
299        parent_item_name = sql_item_name(parent_target, self.flavor, None)
300        parent_item_name_full = sql_item_name(parent_target, self.flavor, parent_schema)
301
302        # Simple string search for parent target in the original definition.
303        if parent_dt_name and (
304            parent_item_name in definition 
305            or parent_item_name_full in definition
306            or parent_target in definition
307        ):
308            pushdown_cte_name = sql_item_name('_mrsm_pushdown', self.flavor, None)
309            pushdown_where = ""
310            if parent_begin_da:
311                pushdown_where += f"\n    {parent_dt_name} >= {parent_begin_da}"
312            if parent_begin_da and parent_end_da:
313                pushdown_where += "\n    AND"
314            if parent_end_da:
315                pushdown_where += f"\n    {parent_dt_name} < {parent_end_da}"
316            
317            pushdown_query = (
318                f"SELECT *\nFROM {parent_item_name_full}"
319                + f"\nWHERE {pushdown_where}"
320            )
321            
322            # Replace occurrences of parent target with pushdown CTE in the definition body.
323            parent_found = False
324            patterns_to_replace = [
325                parent_item_name_full,
326                parent_item_name,
327                parent_target,
328            ]
329
330            new_definition_body = definition
331            for pattern in patterns_to_replace:
332                if pattern in new_definition_body:
333                    new_definition_body = new_definition_body.replace(pattern, pushdown_cte_name)
334                    parent_found = True
335            
336            if parent_found:
337                definition = wrap_query_with_cte(
338                    pushdown_query,
339                    new_definition_body,
340                    self.flavor,
341                    cte_name='_mrsm_pushdown',
342                )
343                handled_bounding = True
344
345    meta_def = (
346        format_cte_subquery(definition, self.flavor, 'definition') if (
347            (not (pipe.columns or {}).get('id', None))
348            or (not get_config('system', 'experimental', 'join_fetch'))
349        ) else _join_fetch_query(pipe, self.flavor, debug=debug, **kw)
350    )
351
352    has_where = 'where' in meta_def.lower()[meta_def.lower().rfind('definition'):]
353    if dt_name and (begin_da or end_da) and not handled_bounding:
354        definition_dt_name = f"{definition_name}.{dt_name}"
355        meta_def += "\n" + ("AND" if has_where else "WHERE") + " "
356        has_where = True
357        if begin_da:
358            meta_def += f"\n    {definition_dt_name}\n    >=\n    {begin_da}\n"
359        if begin_da and end_da:
360            meta_def += "    AND"
361        if end_da:
362            meta_def += f"\n    {definition_dt_name}\n    <\n    {end_da}\n"
363
364    if params is not None:
365        params_where = build_where(params, self, with_where=False)
366        meta_def += "\n    " + ("AND" if has_where else "WHERE") + "    "
367        has_where = True
368        meta_def += params_where
369
370    return meta_def.rstrip()

Return a pipe's meta definition fetch query.

params: Optional[Dict[str, Any]], default None Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.

begin: Union[datetime, int, str, None], default None Most recent datatime to search for data. If backtrack_minutes is provided, subtract backtrack_minutes.

end: Union[datetime, int, str, None], default None The latest datetime to search for data. If end is None, do not bound

check_existing: bool, default True If True, apply the backtrack interval.

debug: bool, default False Verbosity toggle.

Returns
  • A pipe's meta definition fetch query string.
def cli(self, debug: bool = False) -> Tuple[bool, str]:
39def cli(
40    self,
41    debug: bool = False,
42) -> SuccessTuple:
43    """
44    Launch a subprocess for an interactive CLI.
45    """
46    from meerschaum.utils.warnings import dprint
47    from meerschaum.utils.venv import venv_exec
48
49    ### Initialize the engine so that dependencies are resolved.
50    _ = self.engine
51
52    env = copy.deepcopy(dict(os.environ))
53    env_key = f"MRSM_SQL_{self.label.upper()}"
54    env_val = json.dumps(self.meta)
55    env[env_key] = env_val
56    cli_code = (
57        "import sys\n"
58        "import meerschaum as mrsm\n"
59        "import os\n"
60        f"conn = mrsm.get_connector('sql:{self.label}')\n"
61        "success, msg = conn._cli_exit()\n"
62        "mrsm.pprint((success, msg))\n"
63        "if not success:\n"
64        "    raise Exception(msg)"
65    )
66    if debug:
67        dprint(cli_code)
68    try:
69        _ = venv_exec(cli_code, venv=None, env=env, debug=debug, capture_output=False)
70    except Exception as e:
71        return False, f"[{self}] Failed to start CLI:\n{e}"
72    return True, "Success"

Launch a subprocess for an interactive CLI.

def get_pipe_size( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Optional[int]:
104def get_pipe_size(
105    self,
106    pipe: mrsm.Pipe,
107    debug: bool = False,
108    **kwargs: Any
109) -> Union[int, None]:
110    """
111    Return the on-disk size of a pipe's target table in bytes.
112
113    For TimescaleDB hypertables, the total hypertable size (including chunks and indexes)
114    is returned. Other flavors use their native size functions where available.
115
116    Parameters
117    ----------
118    pipe: mrsm.Pipe
119        The pipe whose target table size to measure.
120
121    debug: bool, default False
122        Verbosity toggle.
123
124    Returns
125    -------
126    An `int` of the number of bytes occupied by the target table,
127    or `None` if the size could not be determined.
128    """
129    from meerschaum.utils.sql import sql_item_name, hypertable_queries
130
131    if not pipe.exists(debug=debug):
132        return None
133
134    flavor = self.flavor
135    schema = self.get_pipe_schema(pipe)
136    pipe_name = sql_item_name(pipe.target, flavor, schema)
137
138    def _value(query: str) -> Union[int, None]:
139        try:
140            result = self.value(query, silent=True, debug=debug)
141            return int(result) if result is not None else None
142        except Exception:
143            return None
144
145    ### TimescaleDB / Citus expose dedicated size functions for distributed tables.
146    if flavor in hypertable_queries:
147        size = _value(hypertable_queries[flavor].format(table_name=pipe_name))
148        if size is not None:
149            return size
150
151    if flavor in ('timescaledb', 'timescaledb-ha', 'postgresql', 'postgis', 'citus'):
152        ### `pg_partition_tree` sums the parent plus every child partition (a partitioned parent
153        ### holds no rows itself); it returns the single relation for non-partitioned tables.
154        size = _value(
155            "SELECT SUM(pg_total_relation_size(relid))\n"
156            f"FROM pg_partition_tree('{pipe_name}')"
157        )
158        if size is not None:
159            return size
160        return _value(f"SELECT pg_total_relation_size('{pipe_name}')")
161
162    if flavor == 'cockroachdb':
163        return _value(f"SELECT pg_total_relation_size('{pipe_name}')")
164
165    if flavor in ('mysql', 'mariadb'):
166        ### A MySQL/MariaDB "schema" is a database; honor a pipe's configured schema so the size
167        ### lookup matches the database the table actually lives in.
168        db_name = (
169            self.get_pipe_schema(pipe)
170            or self.database
171            or self.parse_uri(self.URI).get('database', None)
172        )
173        if not db_name:
174            return None
175        clean_db = db_name.replace("'", "''")
176        clean_target = pipe.target.replace("'", "''")
177        return _value(
178            "SELECT data_length + index_length\n"
179            "FROM information_schema.tables\n"
180            f"WHERE table_schema = '{clean_db}' AND table_name = '{clean_target}'"
181        )
182
183    if flavor == 'mssql':
184        clean_name = pipe_name.replace("'", "''")
185        return _value(
186            "SELECT SUM(reserved_page_count) * 8192\n"
187            "FROM sys.dm_db_partition_stats\n"
188            f"WHERE object_id = OBJECT_ID('{clean_name}')"
189        )
190
191    if flavor in ('sqlite', 'geopackage'):
192        clean_target = pipe.target.replace("'", "''")
193        ### `dbstat` is only available when SQLite is compiled with SQLITE_ENABLE_DBSTAT_VTAB.
194        return _value(f"SELECT SUM(pgsize) FROM dbstat WHERE name = '{clean_target}'")
195
196    ### duckdb, oracle, and unknown flavors have no portable per-table size query.
197    return None

Return the on-disk size of a pipe's target table in bytes.

For TimescaleDB hypertables, the total hypertable size (including chunks and indexes) is returned. Other flavors use their native size functions where available.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table size to measure.
  • debug (bool, default False): Verbosity toggle.
Returns
  • An int of the number of bytes occupied by the target table,
  • or None if the size could not be determined.
def compress_pipe( self, pipe: meerschaum.Pipe, no_policy: bool = False, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
421def compress_pipe(
422    self,
423    pipe: mrsm.Pipe,
424    no_policy: bool = False,
425    debug: bool = False,
426    **kwargs: Any
427) -> SuccessTuple:
428    """
429    Compress a pipe's target table to reduce disk usage.
430
431    For TimescaleDB, enables the Hypercore columnstore, installs a columnstore (compression)
432    policy (so future synced chunks are converted automatically), and converts any existing
433    uncompressed chunks now. For MySQL/MariaDB and MSSQL, applies the flavor's native table
434    compression. Other flavors are unsupported.
435
436    Parameters
437    ----------
438    pipe: mrsm.Pipe
439        The pipe whose target table to compress.
440
441    no_policy: bool, default False
442        If `True` (TimescaleDB only), compress existing chunks now without installing an ongoing
443        columnstore (compression) policy. Any pre-existing policy is left untouched.
444
445    debug: bool, default False
446        Verbosity toggle.
447
448    Returns
449    -------
450    A `SuccessTuple` indicating success, including the amount of disk reclaimed.
451    """
452    from meerschaum.utils.sql import sql_item_name
453    from meerschaum.utils.formatting import format_bytes
454
455    if not pipe.exists(debug=debug):
456        return False, f"{pipe} does not exist; nothing to compress."
457
458    flavor = self.flavor
459    if flavor not in COMPRESSIBLE_FLAVORS:
460        return False, f"Compression is not supported for flavor '{flavor}'."
461
462    pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
463    size_before = pipe.get_size(debug=debug)
464
465    ### Each group is run in its own transaction. TimescaleDB requires enabling the columnstore
466    ### and adding its policy in separate transactions (see timescale/timescaledb#8600).
467    query_groups: List[List[str]] = []
468    if flavor in ('timescaledb', 'timescaledb-ha'):
469        if not self._is_hypertable(pipe, debug=debug):
470            return False, _not_a_hypertable_message(pipe)
471        ### 1. Enable the columnstore (required before any chunk can be converted).
472        query_groups.append([self._get_columnstore_settings_query(pipe)])
473        ### 2. Install a policy for ongoing conversion — re-create it so the configured `after`
474        ### wins over any existing (e.g. auto-created) policy. Skipped entirely with `no_policy`,
475        ### which compresses existing chunks now but leaves any pre-existing policy untouched.
476        if not no_policy:
477            query_groups.append([self._get_columnstore_remove_policy_query(pipe)])
478            query_groups.append([self._get_columnstore_policy_query(pipe)])
479        ### 3. Convert any existing uncompressed chunks now. `compress_chunk` is the still-supported
480        ### function form of `convert_to_columnstore` (transaction-safe, unlike the `CALL` form).
481        query_groups.append([
482            f"SELECT compress_chunk(c, if_not_compressed => true) "
483            f"FROM show_chunks('{pipe_name}') c"
484        ])
485    elif flavor in ('mysql', 'mariadb'):
486        query_groups.append([f"ALTER TABLE {pipe_name} ROW_FORMAT=COMPRESSED"])
487    elif flavor == 'mssql':
488        query_groups.append([
489            f"ALTER TABLE {pipe_name} REBUILD PARTITION = ALL "
490            "WITH (DATA_COMPRESSION = PAGE)"
491        ])
492
493    try:
494        success = all(
495            all(self.exec_queries(
496                group, break_on_error=True, rollback=True, silent=(not debug), debug=debug,
497            ))
498            for group in query_groups
499        )
500    except Exception as e:
501        return False, f"Failed to compress {pipe}:\n{e}"
502
503    if not success:
504        return False, f"Failed to compress {pipe}."
505
506    pipe._clear_cache_key('_exists', debug=debug)
507    size_after = pipe.get_size(debug=debug)
508
509    reclaimed_msg = ""
510    if size_before is not None and size_after is not None:
511        reclaimed = size_before - size_after
512        change_str = f"{format_bytes(size_before)} to {format_bytes(size_after)}"
513        if reclaimed > 0:
514            reclaimed_msg = f"Reclaimed {format_bytes(reclaimed)} ({change_str})."
515        elif reclaimed < 0:
516            ### On small tables, compression overhead can exceed the savings.
517            reclaimed_msg = f"Size grew by {format_bytes(-reclaimed)} ({change_str})."
518        else:
519            reclaimed_msg = f"Size unchanged ({format_bytes(size_before)})."
520
521    return True, reclaimed_msg

Compress a pipe's target table to reduce disk usage.

For TimescaleDB, enables the Hypercore columnstore, installs a columnstore (compression) policy (so future synced chunks are converted automatically), and converts any existing uncompressed chunks now. For MySQL/MariaDB and MSSQL, applies the flavor's native table compression. Other flavors are unsupported.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to compress.
  • no_policy (bool, default False): If True (TimescaleDB only), compress existing chunks now without installing an ongoing columnstore (compression) policy. Any pre-existing policy is left untouched.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success, including the amount of disk reclaimed.
def decompress_pipe( self, pipe: meerschaum.Pipe, no_policy: bool = False, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
538def decompress_pipe(
539    self,
540    pipe: mrsm.Pipe,
541    no_policy: bool = False,
542    debug: bool = False,
543    **kwargs: Any
544) -> SuccessTuple:
545    """
546    Decompress a pipe's target table, the inverse of `compress_pipe()`.
547
548    For TimescaleDB, removes the columnstore (compression) policy, converts every compressed
549    chunk back to row-store, and disables the columnstore so future synced chunks stay
550    uncompressed. For MySQL/MariaDB and MSSQL, reverts the flavor's native table compression.
551    Other flavors are unsupported.
552
553    Parameters
554    ----------
555    pipe: mrsm.Pipe
556        The pipe whose target table to decompress.
557
558    no_policy: bool, default False
559        If `True` (TimescaleDB only), decompress existing chunks now but leave the columnstore
560        (compression) policy in place — chunks will be recompressed on the policy's schedule.
561        Useful to temporarily decompress for a bulk backfill without disabling compression.
562
563    debug: bool, default False
564        Verbosity toggle.
565
566    Returns
567    -------
568    A `SuccessTuple` indicating success, including the change in disk size.
569    """
570    from meerschaum.utils.sql import sql_item_name
571    from meerschaum.utils.formatting import format_bytes
572
573    if not pipe.exists(debug=debug):
574        return False, f"{pipe} does not exist; nothing to decompress."
575
576    flavor = self.flavor
577    if flavor not in COMPRESSIBLE_FLAVORS:
578        return False, f"Decompression is not supported for flavor '{flavor}'."
579
580    pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
581    size_before = pipe.get_size(debug=debug)
582
583    ### Each group is run in its own transaction.
584    query_groups: List[List[str]] = []
585    if flavor in ('timescaledb', 'timescaledb-ha'):
586        if not self._is_hypertable(pipe, debug=debug):
587            return False, _not_a_hypertable_message(pipe)
588        ### 1. Remove the ongoing policy so chunks aren't recompressed. Skipped with `no_policy`,
589        ### which decompresses existing chunks now but leaves the policy (e.g. for a backfill).
590        if not no_policy:
591            query_groups.append([self._get_columnstore_remove_policy_query(pipe)])
592        ### 2. Convert every compressed chunk back to row-store. `decompress_chunk` is the function
593        ### form (transaction-safe, unlike the `CALL` form of `convert_to_rowstore`).
594        query_groups.append([
595            f"SELECT decompress_chunk(c, if_compressed => true) "
596            f"FROM show_chunks('{pipe_name}') c"
597        ])
598        ### 3. Disable the columnstore so future synced chunks stay uncompressed. Only valid once
599        ### no compressed chunks remain, and only sensible when the policy is also gone.
600        if not no_policy:
601            query_groups.append([self._get_columnstore_disable_query(pipe)])
602    elif flavor in ('mysql', 'mariadb'):
603        query_groups.append([f"ALTER TABLE {pipe_name} ROW_FORMAT=DYNAMIC"])
604    elif flavor == 'mssql':
605        query_groups.append([
606            f"ALTER TABLE {pipe_name} REBUILD PARTITION = ALL "
607            "WITH (DATA_COMPRESSION = NONE)"
608        ])
609
610    try:
611        success = all(
612            all(self.exec_queries(
613                group, break_on_error=True, rollback=True, silent=(not debug), debug=debug,
614            ))
615            for group in query_groups
616        )
617    except Exception as e:
618        return False, f"Failed to decompress {pipe}:\n{e}"
619
620    if not success:
621        return False, f"Failed to decompress {pipe}."
622
623    pipe._clear_cache_key('_exists', debug=debug)
624    size_after = pipe.get_size(debug=debug)
625
626    change_msg = ""
627    if size_before is not None and size_after is not None:
628        added = size_after - size_before
629        change_str = f"{format_bytes(size_before)} to {format_bytes(size_after)}"
630        if added > 0:
631            change_msg = f"Expanded by {format_bytes(added)} ({change_str})."
632        elif added < 0:
633            change_msg = f"Shrank by {format_bytes(-added)} ({change_str})."
634        else:
635            change_msg = f"Size unchanged ({format_bytes(size_before)})."
636
637    return True, change_msg

Decompress a pipe's target table, the inverse of compress_pipe().

For TimescaleDB, removes the columnstore (compression) policy, converts every compressed chunk back to row-store, and disables the columnstore so future synced chunks stay uncompressed. For MySQL/MariaDB and MSSQL, reverts the flavor's native table compression. Other flavors are unsupported.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to decompress.
  • no_policy (bool, default False): If True (TimescaleDB only), decompress existing chunks now but leave the columnstore (compression) policy in place — chunks will be recompressed on the policy's schedule. Useful to temporarily decompress for a bulk backfill without disabling compression.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success, including the change in disk size.
def apply_compression_policy( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
368def apply_compression_policy(
369    self,
370    pipe: mrsm.Pipe,
371    debug: bool = False,
372    **kwargs: Any
373) -> SuccessTuple:
374    """
375    Idempotently enable compression and install a compression policy for a pipe.
376
377    Intended to be called automatically (e.g. after a sync) when `pipe.compress` is set.
378    Only TimescaleDB hypertables are affected; all other flavors are a no-op success.
379    Failures are non-fatal and never raise.
380
381    Parameters
382    ----------
383    pipe: mrsm.Pipe
384        The pipe whose target table should have a compression policy.
385
386    Returns
387    -------
388    A `SuccessTuple` indicating success.
389    """
390    if self.flavor not in ('timescaledb', 'timescaledb-ha'):
391        return True, "Compression policies are only supported for TimescaleDB."
392
393    if not pipe.parameters.get('compress', False):
394        return True, "Compression is not enabled for this pipe."
395
396    try:
397        if not pipe.exists(debug=debug) or not self._is_hypertable(pipe, debug=debug):
398            return True, f"{pipe} is not a hypertable; skipping compression policy."
399
400        ### Enable the columnstore and add the policy in SEPARATE transactions
401        ### (see timescale/timescaledb#8600).
402        settings_success = all(self.exec_queries(
403            [self._get_columnstore_settings_query(pipe)],
404            break_on_error=True, rollback=True, silent=True, debug=debug,
405        ))
406        policy_success = all(self.exec_queries(
407            [self._get_columnstore_policy_query(pipe)],
408            break_on_error=True, rollback=True, silent=True, debug=debug,
409        ))
410        if not (settings_success and policy_success):
411            return False, f"Failed to apply a compression policy to {pipe}."
412    except Exception as e:
413        msg = f"Failed to apply a compression policy to {pipe}:\n{e}"
414        if debug:
415            dprint(msg)
416        return False, msg
417
418    return True, f"Applied a compression policy to {pipe}."

Idempotently enable compression and install a compression policy for a pipe.

Intended to be called automatically (e.g. after a sync) when pipe.compress is set. Only TimescaleDB hypertables are affected; all other flavors are a no-op success. Failures are non-fatal and never raise.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table should have a compression policy.
Returns
  • A SuccessTuple indicating success.
def vacuum_pipe( self, pipe: meerschaum.Pipe, full: bool = False, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
165def vacuum_pipe(
166    self,
167    pipe: mrsm.Pipe,
168    full: bool = False,
169    debug: bool = False,
170    **kwargs: Any
171) -> SuccessTuple:
172    """
173    Reclaim dead-tuple disk space from a pipe's target table.
174
175    PostgreSQL-family tables run `VACUUM` (optionally `VACUUM FULL`); TimescaleDB hypertables
176    recurse into their chunks; MySQL/MariaDB run `OPTIMIZE TABLE`; MSSQL rebuilds the table;
177    SQLite vacuums the whole database file.
178
179    Parameters
180    ----------
181    pipe: mrsm.Pipe
182        The pipe whose target table to vacuum.
183
184    full: bool, default False
185        If `True` (PostgreSQL family only), run `VACUUM FULL`, which rewrites the table and
186        returns freed space to the operating system at the cost of an exclusive lock.
187
188    debug: bool, default False
189        Verbosity toggle.
190
191    Returns
192    -------
193    A `SuccessTuple` indicating success, including the amount of disk reclaimed.
194    """
195    from meerschaum.utils.sql import sql_item_name
196    from meerschaum.utils.formatting import format_bytes
197
198    if not pipe.exists(debug=debug):
199        return False, f"{pipe} does not exist; nothing to vacuum."
200
201    flavor = self.flavor
202    if flavor not in VACUUMABLE_FLAVORS:
203        return False, f"Vacuuming is not supported for flavor '{flavor}'."
204
205    pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
206    queries = self._get_vacuum_queries(pipe, pipe_name, full=full)
207    if not queries:
208        return False, f"Vacuuming is not supported for flavor '{flavor}'."
209
210    size_before = pipe.get_size(debug=debug)
211
212    try:
213        if flavor in _AUTOCOMMIT_VACUUM_FLAVORS:
214            success = self._run_in_autocommit(queries, silent=(not debug), debug=debug)
215        else:
216            success = all(self.exec_queries(
217                queries, break_on_error=True, rollback=True, silent=(not debug), debug=debug,
218            ))
219    except Exception as e:
220        return False, f"Failed to vacuum {pipe}:\n{e}"
221
222    if not success:
223        return False, f"Failed to vacuum {pipe}."
224
225    pipe._clear_cache_key('_exists', debug=debug)
226    size_after = pipe.get_size(debug=debug)
227
228    reclaimed_msg = f"Vacuumed {pipe}."
229    if size_before is not None and size_after is not None:
230        reclaimed = size_before - size_after
231        change_str = f"{format_bytes(size_before)} to {format_bytes(size_after)}"
232        if reclaimed > 0:
233            reclaimed_msg = f"Reclaimed {format_bytes(reclaimed)} ({change_str})."
234        elif reclaimed < 0:
235            reclaimed_msg = f"Size grew by {format_bytes(-reclaimed)} ({change_str})."
236        else:
237            reclaimed_msg = f"Size unchanged ({format_bytes(size_before)})."
238
239    return True, reclaimed_msg

Reclaim dead-tuple disk space from a pipe's target table.

PostgreSQL-family tables run VACUUM (optionally VACUUM FULL); TimescaleDB hypertables recurse into their chunks; MySQL/MariaDB run OPTIMIZE TABLE; MSSQL rebuilds the table; SQLite vacuums the whole database file.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to vacuum.
  • full (bool, default False): If True (PostgreSQL family only), run VACUUM FULL, which rewrites the table and returns freed space to the operating system at the cost of an exclusive lock.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success, including the amount of disk reclaimed.
def analyze_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
242def analyze_pipe(
243    self,
244    pipe: mrsm.Pipe,
245    debug: bool = False,
246    **kwargs: Any
247) -> SuccessTuple:
248    """
249    Refresh the database planner's statistics for a pipe's target table.
250
251    This does not reclaim disk space; it helps the query planner choose better plans after
252    large syncs. PostgreSQL/SQLite run `ANALYZE`, MySQL/MariaDB run `ANALYZE TABLE`, and MSSQL
253    runs `UPDATE STATISTICS`.
254
255    Parameters
256    ----------
257    pipe: mrsm.Pipe
258        The pipe whose target table to analyze.
259
260    debug: bool, default False
261        Verbosity toggle.
262
263    Returns
264    -------
265    A `SuccessTuple` indicating success.
266    """
267    from meerschaum.utils.sql import sql_item_name
268
269    if not pipe.exists(debug=debug):
270        return False, f"{pipe} does not exist; nothing to analyze."
271
272    flavor = self.flavor
273    if flavor not in ANALYZABLE_FLAVORS:
274        return False, f"Analyzing is not supported for flavor '{flavor}'."
275
276    pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
277    query = self._get_analyze_query(pipe, pipe_name)
278    if not query:
279        return False, f"Analyzing is not supported for flavor '{flavor}'."
280
281    try:
282        success = all(self.exec_queries(
283            [query], break_on_error=True, rollback=True, silent=(not debug), debug=debug,
284        ))
285    except Exception as e:
286        return False, f"Failed to analyze {pipe}:\n{e}"
287
288    if not success:
289        return False, f"Failed to analyze {pipe}."
290
291    return True, f"Analyzed {pipe}."

Refresh the database planner's statistics for a pipe's target table.

This does not reclaim disk space; it helps the query planner choose better plans after large syncs. PostgreSQL/SQLite run ANALYZE, MySQL/MariaDB run ANALYZE TABLE, and MSSQL runs UPDATE STATISTICS.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to analyze.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success.
def get_partition_info(self, pipe: meerschaum.Pipe, debug: bool = False) -> dict:
192def get_partition_info(self, pipe: mrsm.Pipe, debug: bool = False) -> dict:
193    """
194    Return a summary of a pipe's target table partitioning for `show partitions`.
195
196    Keys:
197    - `flavor`: the connector flavor.
198    - `partitioned`: whether the table is range-partitioned (native) or a TimescaleDB hypertable.
199    - `count`: the number of partitions / chunks (`None` if unknown).
200    - `interval`: the physical partition width (`timedelta`, epoch-`int`, or `None`).
201    """
202    info = {'flavor': self.flavor, 'partitioned': False, 'count': None, 'interval': None}
203    try:
204        if not pipe.exists(debug=debug):
205            return info
206    except Exception:
207        return info
208
209    flavor = self.flavor
210    if flavor in _TIMESCALEDB_FLAVORS:
211        if not self._is_hypertable(pipe, debug=debug):
212            return info
213        info['partitioned'] = True
214        info['count'] = self._get_chunk_count_timescaledb(pipe, debug=debug)
215        info['interval'] = pipe.get_chunk_interval(debug=debug)
216        return info
217
218    if not self._should_partition(pipe):
219        return info
220    ### Report based on the table's ACTUAL state, not just the `hypertable` flag — a pre-existing
221    ### plain table (created before partitioning, or with `hypertable` only just enabled) has no
222    ### partitions and should not be reported as partitioned.
223    count = self._get_partition_count(pipe, debug=debug)
224    if not count:
225        return info
226    info['partitioned'] = True
227    info['count'] = count
228    info['interval'] = pipe.get_chunk_interval(debug=debug)
229    return info

Return a summary of a pipe's target table partitioning for show partitions.

Keys:

  • flavor: the connector flavor.
  • partitioned: whether the table is range-partitioned (native) or a TimescaleDB hypertable.
  • count: the number of partitions / chunks (None if unknown).
  • interval: the physical partition width (timedelta, epoch-int, or None).
def partition_pipe( self, pipe: meerschaum.Pipe, chunk_minutes: Optional[int] = None, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
796def partition_pipe(
797    self,
798    pipe: mrsm.Pipe,
799    chunk_minutes: Optional[int] = None,
800    debug: bool = False,
801    **kwargs: Any
802) -> SuccessTuple:
803    """
804    Rebuild a pipe's target table to a new partition (chunk) width.
805
806    The width is taken from `chunk_minutes` if provided, else the pipe's configured
807    `verify.chunk_minutes`. The new width is persisted to `verify.chunk_minutes`, which is the
808    authoritative partition width (see `Pipe.get_chunk_interval`).
809
810    Strategy by flavor:
811
812    - **TimescaleDB**: call `set_chunk_time_interval()`. This changes the width of FUTURE chunks
813      only; existing chunks are not rewritten.
814    - **PostgreSQL / PostGIS, MySQL / MariaDB, MSSQL**: rebuild the table by reading its data,
815      dropping it, and re-syncing at the new width. This reuses the tested `create_pipe_table_from_df`
816      and `_create_missing_partitions` paths, and (for MSSQL) frees the partition function/scheme
817      names so they can be recreated. The whole table is read into memory; for very large tables
818      consider a manual chunked rebuild.
819
820    Parameters
821    ----------
822    pipe: mrsm.Pipe
823        The partitioned pipe whose target table to repartition.
824
825    chunk_minutes: Optional[int], default None
826        The new partition width in minutes. Defaults to the pipe's `verify.chunk_minutes`.
827
828    debug: bool, default False
829        Verbosity toggle.
830
831    Returns
832    -------
833    A `SuccessTuple` indicating success.
834    """
835    from meerschaum.config import get_config
836    from meerschaum.utils.warnings import warn
837
838    flavor = self.flavor
839    if flavor not in (PARTITIONABLE_FLAVORS | _TIMESCALEDB_FLAVORS):
840        return False, f"Repartitioning is not supported for flavor '{flavor}'."
841
842    is_timescaledb = flavor in _TIMESCALEDB_FLAVORS
843    if not is_timescaledb and not self._should_partition(pipe):
844        return False, (
845            f"{pipe} is not partitioned. Set `hypertable` to `True` (and define a `datetime` "
846            "column) to enable native range partitioning."
847        )
848
849    if pipe.columns.get('datetime', None) is None:
850        return False, f"{pipe} has no `datetime` column to partition by."
851
852    if not pipe.exists(debug=debug):
853        return False, f"{pipe} does not exist; nothing to repartition."
854
855    new_minutes = (
856        chunk_minutes
857        if chunk_minutes is not None
858        else (
859            pipe.parameters.get('verify', {}).get('chunk_minutes', None)
860            or get_config('pipes', 'parameters', 'verify', 'chunk_minutes')
861        )
862    )
863    if not isinstance(new_minutes, int) or new_minutes <= 0:
864        return False, f"Invalid chunk interval '{new_minutes}'; must be a positive integer of minutes."
865
866    ### TimescaleDB: native, no rewrite. Future chunks adopt the new interval.
867    if is_timescaledb:
868        from meerschaum.utils.sql import sql_item_name
869        ### `set_chunk_time_interval` takes the hypertable as a `regclass`; pass the
870        ### schema-qualified, quoted name as a string literal so it resolves unambiguously.
871        pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
872        regclass_literal = "'" + pipe_name.replace("'", "''") + "'"
873        interval = pipe.get_chunk_interval(new_minutes, debug=debug)
874        chunk_time_interval = (
875            f"{interval}"
876            if isinstance(interval, int)
877            else f"INTERVAL '{int(interval.total_seconds() / 60)} MINUTES'"
878        )
879        query = f"SELECT set_chunk_time_interval({regclass_literal}, {chunk_time_interval})"
880        try:
881            success = self.exec(query, silent=(not debug), debug=debug) is not None
882        except Exception as e:
883            return False, f"Failed to set chunk interval for {pipe}:\n{e}"
884        if not success:
885            return False, f"Failed to set chunk interval for {pipe}."
886        pipe.update_parameters(
887            {'verify': {'chunk_minutes': new_minutes}}, persist=True, debug=debug
888        )
889        return True, (
890            f"Set chunk interval for {pipe} to {new_minutes} minutes "
891            "(applies to future chunks; existing chunks are unchanged)."
892        )
893
894    ### Non-TimescaleDB: rebuild via a drop + re-sync round-trip.
895    current_interval = pipe.get_chunk_interval(debug=debug)
896    new_interval = pipe.get_chunk_interval(new_minutes, debug=debug)
897    if current_interval == new_interval:
898        return True, f"{pipe} is already partitioned at {new_minutes} minutes."
899
900    rowcount_before = pipe.get_rowcount(debug=debug)
901
902    if debug:
903        dprint(f"[{self}] Reading {pipe} data to rebuild partitions at {new_minutes} minutes.")
904    df = pipe.get_data(debug=debug)
905    if df is None:
906        return False, f"Could not read data for {pipe}; aborting repartition."
907
908    ### Persist the new width BEFORE recreating so the rebuild lays partitions at the new size.
909    ### `verify.chunk_minutes` is the authoritative partition width.
910    update_success, update_msg = pipe.update_parameters(
911        {'verify': {'chunk_minutes': new_minutes}},
912        persist=True,
913        debug=debug,
914    )
915    if not update_success:
916        return False, f"Failed to persist new partition width for {pipe}:\n{update_msg}"
917
918    drop_success, drop_msg = pipe.drop(debug=debug)
919    if not drop_success:
920        return False, f"Failed to drop {pipe} during repartition:\n{drop_msg}"
921
922    ### Re-sync the data we read; `create_pipe_table_from_df` recreates the table at the new
923    ### width and `_create_missing_partitions` populates the partitions.
924    sync_success, sync_msg = pipe.sync(df, debug=debug)
925    if not sync_success:
926        return False, (
927            f"Repartition of {pipe} failed during re-sync; the table was dropped and must be "
928            f"resynced from its source:\n{sync_msg}"
929        )
930
931    rowcount_after = pipe.get_rowcount(debug=debug)
932    if (
933        rowcount_before is not None
934        and rowcount_after is not None
935        and rowcount_after != rowcount_before
936    ):
937        warn(
938            f"Row count changed during repartition of {pipe} "
939            f"({rowcount_before} -> {rowcount_after}).",
940            stack=False,
941        )
942
943    return True, f"Repartitioned {pipe} to {new_minutes} minutes."

Rebuild a pipe's target table to a new partition (chunk) width.

The width is taken from chunk_minutes if provided, else the pipe's configured verify.chunk_minutes. The new width is persisted to verify.chunk_minutes, which is the authoritative partition width (see Pipe.get_chunk_interval).

Strategy by flavor:

  • TimescaleDB: call set_chunk_time_interval(). This changes the width of FUTURE chunks only; existing chunks are not rewritten.
  • PostgreSQL / PostGIS, MySQL / MariaDB, MSSQL: rebuild the table by reading its data, dropping it, and re-syncing at the new width. This reuses the tested create_pipe_table_from_df and _create_missing_partitions paths, and (for MSSQL) frees the partition function/scheme names so they can be recreated. The whole table is read into memory; for very large tables consider a manual chunked rebuild.
Parameters
  • pipe (mrsm.Pipe): The partitioned pipe whose target table to repartition.
  • chunk_minutes (Optional[int], default None): The new partition width in minutes. Defaults to the pipe's verify.chunk_minutes.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success.
def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False) -> Dict[int, Tuple[str, str, Optional[str], Dict[str, Any]]]:
144def fetch_pipes_keys(
145    self,
146    connector_keys: Optional[List[str]] = None,
147    metric_keys: Optional[List[str]] = None,
148    location_keys: Optional[List[str]] = None,
149    tags: Optional[List[str]] = None,
150    params: Optional[Dict[str, Any]] = None,
151    debug: bool = False,
152) -> Dict[
153        int, Tuple[str, str, Union[str, None], Dict[str, Any]]
154    ]:
155    """
156    Return a dictionary mapping pipe IDs to key tuples corresponding to the parameters provided.
157
158    Parameters
159    ----------
160    connector_keys: Optional[List[str]], default None
161        List of connector_keys to search by.
162
163    metric_keys: Optional[List[str]], default None
164        List of metric_keys to search by.
165
166    location_keys: Optional[List[str]], default None
167        List of location_keys to search by.
168
169    tags: Optional[List[str]], default None
170        List of pipes to search by.
171
172    params: Optional[Dict[str, Any]], default None
173        Dictionary of additional parameters to search by.
174        E.g. `--params pipe_id:1`
175
176    debug: bool, default False
177        Verbosity toggle.
178
179    Returns
180    -------
181    A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters).
182    """
183    from meerschaum.utils.packages import attempt_import
184    from meerschaum.utils.misc import separate_negation_values
185    from meerschaum.utils.sql import (
186        OMIT_NULLSFIRST_FLAVORS,
187        table_exists,
188        json_flavors,
189    )
190    from meerschaum._internal.static import STATIC_CONFIG
191    import json
192    from copy import deepcopy
193    sqlalchemy, sqlalchemy_sql_functions = attempt_import(
194        'sqlalchemy',
195        'sqlalchemy.sql.functions', lazy=False,
196    )
197    coalesce = sqlalchemy_sql_functions.coalesce
198
199    if connector_keys is None:
200        connector_keys = []
201    if metric_keys is None:
202        metric_keys = []
203    if location_keys is None:
204        location_keys = []
205    else:
206        location_keys = [
207            (
208                lk
209                if lk not in ('[None]', 'None', 'null')
210                else 'None'
211            )
212            for lk in location_keys
213        ]
214    if tags is None:
215        tags = []
216
217    if params is None:
218        params = {}
219
220    ### Add three primary keys to params dictionary
221    ###   (separated for convenience of arguments).
222    cols = {
223        'connector_keys': [str(ck) for ck in connector_keys],
224        'metric_key': [str(mk) for mk in metric_keys],
225        'location_key': [str(lk) for lk in location_keys],
226    }
227
228    ### Make deep copy so we don't mutate this somewhere else.
229    parameters = deepcopy(params)
230    for col, vals in cols.items():
231        if vals not in [[], ['*']]:
232            parameters[col] = vals
233
234    if not table_exists('mrsm_pipes', self, schema=self.instance_schema, debug=debug):
235        return {}
236
237    from meerschaum.connectors.sql.tables import get_tables
238    pipes_tbl = get_tables(mrsm_instance=self, create=False, debug=debug)['pipes']
239
240    _params = {}
241    for k, v in parameters.items():
242        _v = json.dumps(v) if isinstance(v, dict) else v
243        _params[k] = _v
244
245    negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix']
246    ### Parse regular params.
247    ### If a param begins with '_', negate it instead.
248    _where = [
249        (
250            (coalesce(pipes_tbl.c[key], 'None') == val)
251            if not str(val).startswith(negation_prefix)
252            else (pipes_tbl.c[key] != key)
253        ) for key, val in _params.items()
254        if not isinstance(val, (list, tuple)) and key in pipes_tbl.c
255    ]
256    if self.flavor in json_flavors:
257        sqlalchemy_dialects = mrsm.attempt_import('sqlalchemy.dialects', lazy=False)
258        JSONB = sqlalchemy_dialects.postgresql.JSONB
259    else:
260        JSONB = sqlalchemy.String
261
262    select_cols = (
263        [
264            pipes_tbl.c.pipe_id,
265            pipes_tbl.c.connector_keys,
266            pipes_tbl.c.metric_key,
267            pipes_tbl.c.location_key,
268            pipes_tbl.c.parameters,
269        ]
270    )
271
272    q = sqlalchemy.select(*select_cols).where(sqlalchemy.and_(True, *_where))
273    for c, vals in cols.items():
274        if not isinstance(vals, (list, tuple)) or not vals or c not in pipes_tbl.c:
275            continue
276        _in_vals, _ex_vals = separate_negation_values(vals)
277        q = q.where(coalesce(pipes_tbl.c[c], 'None').in_(_in_vals)) if _in_vals else q
278        q = q.where(coalesce(pipes_tbl.c[c], 'None').not_in(_ex_vals)) if _ex_vals else q
279
280    ### Finally, parse tags.
281    tag_groups = [tag.split(',') for tag in tags]
282    in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
283
284    ors, nands = [], []
285    if self.flavor in json_flavors:
286        tags_jsonb = pipes_tbl.c['parameters'].cast(JSONB).op('->')('tags').cast(JSONB)
287        for _in_tags, _ex_tags in in_ex_tag_groups:
288            if _in_tags:
289                ors.append(
290                    sqlalchemy.and_(
291                        tags_jsonb.contains(_in_tags)
292                    )
293                )
294            for xt in _ex_tags:
295                nands.append(
296                    sqlalchemy.not_(
297                        sqlalchemy.and_(
298                            tags_jsonb.contains([xt])
299                        )
300                    )
301                )
302    else:
303        for _in_tags, _ex_tags in in_ex_tag_groups:
304            sub_ands = []
305            for nt in _in_tags:
306                sub_ands.append(
307                    sqlalchemy.cast(
308                        pipes_tbl.c['parameters'],
309                        sqlalchemy.String,
310                    ).like(f'%"tags":%"{nt}"%')
311                )
312            if sub_ands:
313                ors.append(sqlalchemy.and_(*sub_ands))
314
315            for xt in _ex_tags:
316                nands.append(
317                    sqlalchemy.cast(
318                        pipes_tbl.c['parameters'],
319                        sqlalchemy.String,
320                    ).not_like(f'%"tags":%"{xt}"%')
321                )
322
323    q = q.where(sqlalchemy.and_(*nands)) if nands else q
324    q = q.where(sqlalchemy.or_(*ors)) if ors else q
325    loc_asc = sqlalchemy.asc(pipes_tbl.c['location_key'])
326    if self.flavor not in OMIT_NULLSFIRST_FLAVORS:
327        loc_asc = sqlalchemy.nullsfirst(loc_asc)
328    q = q.order_by(
329        sqlalchemy.asc(pipes_tbl.c['connector_keys']),
330        sqlalchemy.asc(pipes_tbl.c['metric_key']),
331        loc_asc,
332    )
333
334    ### execute the query and return a list of tuples
335    if debug:
336        dprint(q)
337    try:
338        rows = (
339            self.execute(q).fetchall()
340            if self.flavor != 'duckdb'
341            else [
342                (
343                    row['pipe_id'],
344                    row['connector_keys'],
345                    row['metric_key'],
346                    row['location_key'],
347                    row['parameters'],
348                )
349                for row in self.read(q).to_dict(orient='records')
350            ]
351        )
352    except Exception as e:
353        error(str(e))
354
355    return {
356        row[0]: row[1:]
357        for row in rows
358    }

Return a dictionary mapping pipe IDs to key tuples corresponding to the parameters provided.

Parameters
  • connector_keys (Optional[List[str]], default None): List of connector_keys to search by.
  • metric_keys (Optional[List[str]], default None): List of metric_keys to search by.
  • location_keys (Optional[List[str]], default None): List of location_keys to search by.
  • tags (Optional[List[str]], default None): List of pipes to search by.
  • params (Optional[Dict[str, Any]], default None): Dictionary of additional parameters to search by. E.g. --params pipe_id:1
  • debug (bool, default False): Verbosity toggle.
Returns
  • A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters).
def create_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, indices: Optional[List[str]] = None, debug: bool = False) -> bool:
379def create_indices(
380    self,
381    pipe: mrsm.Pipe,
382    columns: Optional[List[str]] = None,
383    indices: Optional[List[str]] = None,
384    debug: bool = False
385) -> bool:
386    """
387    Create a pipe's indices.
388    """
389    if pipe.__dict__.get('_skip_check_indices', False):
390        return True
391
392    if debug:
393        dprint(f"Creating indices for {pipe}...")
394
395    if not pipe.indices:
396        warn(f"{pipe} has no index columns; skipping index creation.", stack=False)
397        return True
398
399    cols_to_include = set((columns or []) + (indices or [])) or None
400
401    pipe._clear_cache_key('_columns_indices', debug=debug)
402    ix_queries = {
403        col: queries
404        for col, queries in self.get_create_index_queries(pipe, debug=debug).items()
405        if cols_to_include is None or col in cols_to_include
406    }
407    success = True
408    for col, queries in ix_queries.items():
409        ix_success = all(self.exec_queries(queries, debug=debug, silent=False))
410        success = success and ix_success
411        if not ix_success:
412            warn(f"Failed to create index on column: {col}")
413
414    return success

Create a pipe's indices.

def drop_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, indices: Optional[List[str]] = None, debug: bool = False) -> bool:
435def drop_indices(
436    self,
437    pipe: mrsm.Pipe,
438    columns: Optional[List[str]] = None,
439    indices: Optional[List[str]] = None,
440    debug: bool = False
441) -> bool:
442    """
443    Drop a pipe's indices.
444    """
445    if debug:
446        dprint(f"Dropping indices for {pipe}...")
447
448    if not pipe.indices:
449        warn(f"No indices to drop for {pipe}.", stack=False)
450        return False
451
452    cols_to_include = set((columns or []) + (indices or [])) or None
453
454    ix_queries = {
455        col: queries
456        for col, queries in self.get_drop_index_queries(pipe, debug=debug).items()
457        if cols_to_include is None or col in cols_to_include
458    }
459    success = True
460    for col, queries in ix_queries.items():
461        ix_success = all(self.exec_queries(queries, debug=debug, silent=(not debug)))
462        if not ix_success:
463            success = False
464            if debug:
465                dprint(f"Failed to drop index on column: {col}")
466    return success

Drop a pipe's indices.

def get_create_index_queries( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[str]]:
532def get_create_index_queries(
533    self,
534    pipe: mrsm.Pipe,
535    debug: bool = False,
536) -> Dict[str, List[str]]:
537    """
538    Return a dictionary mapping columns to a `CREATE INDEX` or equivalent query.
539
540    Parameters
541    ----------
542    pipe: mrsm.Pipe
543        The pipe to which the queries will correspond.
544
545    Returns
546    -------
547    A dictionary of index names mapping to lists of queries.
548    """
549    ### NOTE: Due to recent breaking changes in DuckDB, indices don't behave properly.
550    if self.flavor == 'duckdb':
551        return {}
552    from meerschaum.utils.sql import (
553        sql_item_name,
554        get_distinct_col_count,
555        UPDATE_QUERIES,
556        get_null_replacement,
557        get_create_table_queries,
558        get_rename_table_queries,
559        COALESCE_UNIQUE_INDEX_FLAVORS,
560    )
561    from meerschaum.utils.dtypes import are_dtypes_equal
562    from meerschaum.utils.dtypes.sql import (
563        get_db_type_from_pd_type,
564        get_pd_type_from_db_type,
565        AUTO_INCREMENT_COLUMN_FLAVORS,
566    )
567    from meerschaum.config import get_config
568    index_queries = {}
569
570    upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
571    static = pipe.parameters.get('static', False)
572    null_indices = pipe.parameters.get('null_indices', True)
573    index_names = pipe.get_indices()
574    unique_index_name_unquoted = index_names.get('unique', None) or f'IX_{pipe.target}_unique'
575    if upsert:
576        _ = index_names.pop('unique', None)
577    indices = pipe.indices
578    existing_cols_types = pipe.get_columns_types(debug=debug)
579    existing_cols_pd_types = {
580        col: get_pd_type_from_db_type(typ)
581        for col, typ in existing_cols_types.items()
582    }
583    existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
584    existing_ix_names = set()
585    existing_primary_keys = []
586    existing_clustered_primary_keys = []
587    for col, col_indices in existing_cols_indices.items():
588        for col_ix_doc in col_indices:
589            existing_ix_names.add(col_ix_doc.get('name', '').lower())
590            if col_ix_doc.get('type', None) == 'PRIMARY KEY':
591                existing_primary_keys.append(col.lower())
592                if col_ix_doc.get('clustered', True):
593                    existing_clustered_primary_keys.append(col.lower())
594
595    _datetime = pipe.get_columns('datetime', error=False)
596    _datetime_name = (
597        sql_item_name(_datetime, self.flavor, None)
598        if _datetime is not None else None
599    )
600    _datetime_index_name = (
601        sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None)
602        if index_names.get('datetime', None)
603        else None
604    )
605    _id = pipe.get_columns('id', error=False)
606    _id_name = (
607        sql_item_name(_id, self.flavor, None)
608        if _id is not None
609        else None
610    )
611    primary_key = pipe.columns.get('primary', None)
612    primary_key_name = (
613        sql_item_name(primary_key, flavor=self.flavor, schema=None)
614        if primary_key
615        else None
616    )
617    autoincrement = (
618        pipe.parameters.get('autoincrement', False)
619        or (
620            primary_key is not None
621            and primary_key not in existing_cols_pd_types
622        )
623    )
624    primary_key_db_type = (
625        get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int') or 'int', self.flavor)
626        if primary_key
627        else None
628    )
629    primary_key_constraint_name = (
630        sql_item_name(f'PK_{pipe.target}', self.flavor, None)
631        if primary_key is not None
632        else None
633    )
634    primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
635    datetime_clustered = (
636        "CLUSTERED"
637        if not existing_clustered_primary_keys and _datetime is not None
638        else "NONCLUSTERED"
639    )
640    include_columns_str = "\n    ,".join(
641        [
642            sql_item_name(col, flavor=self.flavor) for col in existing_cols_types
643            if col != _datetime
644        ]
645    ).rstrip(',')
646    include_clause = (
647        (
648            f"\nINCLUDE (\n    {include_columns_str}\n)"
649        )
650        if datetime_clustered == 'NONCLUSTERED'
651        else ''
652    )
653
654    _id_index_name = (
655        sql_item_name(index_names['id'], self.flavor, None)
656        if index_names.get('id', None)
657        else None
658    )
659    _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
660    _create_space_partition = get_config('system', 'experimental', 'space')
661
662    ### create datetime index
663    dt_query = None
664    if _datetime is not None:
665        if (
666            self.flavor in ('timescaledb', 'timescaledb-ha')
667            and pipe.parameters.get('hypertable', True)
668        ):
669            _id_count = (
670                get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
671                if (_id is not None and _create_space_partition) else None
672            )
673
674            chunk_interval = pipe.get_chunk_interval(debug=debug)
675            chunk_interval_minutes = (
676                chunk_interval
677                if isinstance(chunk_interval, int)
678                else int(chunk_interval.total_seconds() / 60)
679            )
680            chunk_time_interval = (
681                f"INTERVAL '{chunk_interval_minutes} MINUTES'"
682                if isinstance(chunk_interval, timedelta)
683                else f'{chunk_interval_minutes}'
684            )
685
686            dt_query = (
687                f"SELECT public.create_hypertable('{_pipe_name}', " +
688                f"'{_datetime}', "
689                + (
690                    f"'{_id}', {_id_count}, " if (_id is not None and _create_space_partition)
691                    else ''
692                )
693                + f'chunk_time_interval => {chunk_time_interval}, '
694                + 'if_not_exists => true, '
695                + "migrate_data => true);"
696            )
697        elif _datetime_index_name and _datetime != primary_key:
698            if self.flavor == 'mssql':
699                dt_query = (
700                    f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
701                    f"\nON {_pipe_name} ({_datetime_name}){include_clause}"
702                )
703            else:
704                dt_query = (
705                    f"CREATE INDEX {_datetime_index_name} "
706                    + f"ON {_pipe_name} ({_datetime_name})"
707                )
708
709    if dt_query:
710        index_queries[_datetime] = [dt_query]
711
712    primary_queries = []
713    if (
714        primary_key is not None
715        and primary_key.lower() not in existing_primary_keys
716        and not static
717    ):
718        if autoincrement and primary_key not in existing_cols_pd_types:
719            autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get(
720                self.flavor,
721                AUTO_INCREMENT_COLUMN_FLAVORS['default']
722            )
723            primary_queries.extend([
724                (
725                    f"ALTER TABLE {_pipe_name}\n"
726                    f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}"
727                ),
728            ])
729        elif not autoincrement and primary_key in existing_cols_pd_types:
730            if self.flavor in ('sqlite', 'geopackage'):
731                new_table_name = sql_item_name(
732                    f'_new_{pipe.target}',
733                    self.flavor,
734                    self.get_pipe_schema(pipe)
735                )
736                select_cols_str = ', '.join(
737                    [
738                        sql_item_name(col, self.flavor, None)
739                        for col in existing_cols_types
740                    ]
741                )
742                primary_queries.extend(
743                    get_create_table_queries(
744                        existing_cols_pd_types,
745                        f'_new_{pipe.target}',
746                        self.flavor,
747                        schema=self.get_pipe_schema(pipe),
748                        primary_key=primary_key,
749                    ) + [
750                        (
751                            f"INSERT INTO {new_table_name} ({select_cols_str})\n"
752                            f"SELECT {select_cols_str}\nFROM {_pipe_name}"
753                        ),
754                        f"DROP TABLE {_pipe_name}",
755                    ] + get_rename_table_queries(
756                        f'_new_{pipe.target}',
757                        pipe.target,
758                        self.flavor,
759                        schema=self.get_pipe_schema(pipe),
760                    )
761                )
762            elif self.flavor == 'oracle':
763                primary_queries.extend([
764                    (
765                        f"ALTER TABLE {_pipe_name}\n"
766                        f"MODIFY {primary_key_name} NOT NULL"
767                    ),
768                    (
769                        f"ALTER TABLE {_pipe_name}\n"
770                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
771                    )
772                ])
773            elif self.flavor in ('mysql', 'mariadb'):
774                primary_queries.extend([
775                    (
776                        f"ALTER TABLE {_pipe_name}\n"
777                        f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL"
778                    ),
779                    (
780                        f"ALTER TABLE {_pipe_name}\n"
781                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
782                    )
783                ])
784            elif self.flavor in ('timescaledb', 'timescaledb-ha'):
785                primary_queries.extend([
786                    (
787                        f"ALTER TABLE {_pipe_name}\n"
788                        f"ALTER COLUMN {primary_key_name} SET NOT NULL"
789                    ),
790                    (
791                        f"ALTER TABLE {_pipe_name}\n"
792                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + (
793                            f"{_datetime_name}, " if _datetime_name else ""
794                        ) + f"{primary_key_name})"
795                    ),
796                ])
797            elif self.flavor in ('citus', 'postgresql', 'duckdb', 'postgis'):
798                primary_queries.extend([
799                    (
800                        f"ALTER TABLE {_pipe_name}\n"
801                        f"ALTER COLUMN {primary_key_name} SET NOT NULL"
802                    ),
803                    (
804                        f"ALTER TABLE {_pipe_name}\n"
805                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
806                    ),
807                ])
808            else:
809                primary_queries.extend([
810                    (
811                        f"ALTER TABLE {_pipe_name}\n"
812                        f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL"
813                    ),
814                    (
815                        f"ALTER TABLE {_pipe_name}\n"
816                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
817                    ),
818                ])
819        index_queries[primary_key] = primary_queries
820
821    ### create id index
822    if _id_name is not None:
823        if self.flavor in ('timescaledb', 'timescaledb-ha'):
824            ### Already created indices via create_hypertable.
825            id_query = (
826                None if (_id is not None and _create_space_partition)
827                else (
828                    f"CREATE INDEX IF NOT EXISTS {_id_index_name} ON {_pipe_name} ({_id_name})"
829                    if _id is not None
830                    else None
831                )
832            )
833            pass
834        else: ### mssql, sqlite, etc.
835            id_query = (
836                None
837                if _is_non_indexable_col(_id, existing_cols_types, self.flavor)
838                else f"CREATE INDEX {_id_index_name} ON {_pipe_name} ({_id_name})"
839            )
840
841        if id_query is not None:
842            index_queries[_id] = id_query if isinstance(id_query, list) else [id_query]
843
844    ### Create indices for other labels in `pipe.columns`.
845    other_index_names = {
846        ix_key: ix_unquoted
847        for ix_key, ix_unquoted in index_names.items()
848        if (
849            ix_key not in ('datetime', 'id', 'primary')
850            and ix_unquoted.lower() not in existing_ix_names
851        )
852    }
853    for ix_key, ix_unquoted in other_index_names.items():
854        ix_name = sql_item_name(ix_unquoted, self.flavor, None)
855        cols = indices[ix_key]
856        if not isinstance(cols, (list, tuple)):
857            cols = [cols]
858        if ix_key == 'unique' and upsert:
859            continue
860        if self.flavor in ('mysql', 'mariadb', 'mssql'):
861            cols = [
862                col for col in cols
863                if col and not _is_non_indexable_col(
864                    col, existing_cols_types, self.flavor
865                )
866            ]
867        cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
868        if not cols_names:
869            continue
870
871        cols_names_str = ", ".join(cols_names)
872        index_query_params_clause = f" ({cols_names_str})"
873        if self.flavor in ('postgis', 'timescaledb-ha'):
874            for col in cols:
875                col_typ = existing_cols_pd_types.get(cols[0], 'object')
876                if col_typ != 'object' and are_dtypes_equal(col_typ, 'geometry'):
877                    index_query_params_clause = f" USING GIST ({cols_names_str})"
878                    break
879
880        index_queries[ix_key] = [
881            f"CREATE INDEX {ix_name} ON {_pipe_name}{index_query_params_clause}"
882        ]
883
884    indices_cols_str = ', '.join(
885        list({
886            sql_item_name(ix, self.flavor)
887            for ix_key, ix in pipe.columns.items()
888            if ix and ix in existing_cols_types
889        })
890    )
891    coalesce_indices_cols_str = ', '.join(
892        [
893            (
894                (
895                    "COALESCE("
896                    + sql_item_name(ix, self.flavor)
897                    + ", "
898                    + get_null_replacement(existing_cols_types[ix], self.flavor)
899                    + ") "
900                )
901                if ix_key != 'datetime' and null_indices
902                else sql_item_name(ix, self.flavor)
903            )
904            for ix_key, ix in pipe.columns.items()
905            if ix and ix in existing_cols_types
906        ]
907    )
908    unique_index_name = sql_item_name(unique_index_name_unquoted, self.flavor)
909    constraint_name_unquoted = unique_index_name_unquoted.replace('IX_', 'UQ_')
910    constraint_name = sql_item_name(constraint_name_unquoted, self.flavor)
911    add_constraint_query = (
912        f"ALTER TABLE {_pipe_name} ADD CONSTRAINT {constraint_name} UNIQUE ({indices_cols_str})"
913    )
914    unique_index_cols_str = (
915        indices_cols_str
916        if self.flavor not in COALESCE_UNIQUE_INDEX_FLAVORS or not null_indices
917        else coalesce_indices_cols_str
918    )
919    create_unique_index_query = (
920        f"CREATE UNIQUE INDEX {unique_index_name} ON {_pipe_name} ({unique_index_cols_str})"
921    )
922    constraint_queries = [create_unique_index_query]
923    if self.flavor not in ('sqlite', 'geopackage'):
924        constraint_queries.append(add_constraint_query)
925    if upsert and indices_cols_str and unique_index_name_unquoted.lower() not in existing_ix_names:
926        index_queries[unique_index_name] = constraint_queries
927        ### Remove regular indices that cover the same single column as the unique index.
928        ### Some flavors (e.g. Oracle) reject two indices on the same column combination.
929        if unique_index_cols_str == _id_name:
930            index_queries.pop(_id, None)
931        if unique_index_cols_str == _datetime_name:
932            index_queries.pop(_datetime, None)
933    return index_queries

Return a dictionary mapping columns to a CREATE INDEX or equivalent query.

Parameters
  • pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
  • A dictionary of index names mapping to lists of queries.
def get_drop_index_queries( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[str]]:
 936def get_drop_index_queries(
 937    self,
 938    pipe: mrsm.Pipe,
 939    debug: bool = False,
 940) -> Dict[str, List[str]]:
 941    """
 942    Return a dictionary mapping columns to a `DROP INDEX` or equivalent query.
 943
 944    Parameters
 945    ----------
 946    pipe: mrsm.Pipe
 947        The pipe to which the queries will correspond.
 948
 949    Returns
 950    -------
 951    A dictionary of column names mapping to lists of queries.
 952    """
 953    ### NOTE: Due to breaking changes within DuckDB, indices must be skipped.
 954    if self.flavor == 'duckdb':
 955        return {}
 956    if not pipe.exists(debug=debug):
 957        return {}
 958
 959    from collections import defaultdict
 960    from meerschaum.utils.sql import (
 961        sql_item_name,
 962        table_exists,
 963        hypertable_queries,
 964        DROP_INDEX_IF_EXISTS_FLAVORS,
 965    )
 966    drop_queries = defaultdict(lambda: [])
 967    schema = self.get_pipe_schema(pipe)
 968    index_schema = schema if self.flavor != 'mssql' else None
 969    indices = {
 970        ix_key: ix
 971        for ix_key, ix in pipe.get_indices().items()
 972    }
 973    cols_indices = pipe.get_columns_indices(debug=debug)
 974    existing_indices = set()
 975    clustered_ix = None
 976    for col, ix_metas in cols_indices.items():
 977        for ix_meta in ix_metas:
 978            ix_name = ix_meta.get('name', None)
 979            if ix_meta.get('clustered', False):
 980                clustered_ix = ix_name
 981            existing_indices.add(ix_name.lower())
 982    pipe_name = sql_item_name(pipe.target, self.flavor, schema)
 983    pipe_name_no_schema = sql_item_name(pipe.target, self.flavor, None)
 984    upsert = pipe.upsert
 985
 986    if self.flavor not in hypertable_queries:
 987        is_hypertable = False
 988    else:
 989        is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name)
 990        is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None
 991
 992    if_exists_str = "IF EXISTS " if self.flavor in DROP_INDEX_IF_EXISTS_FLAVORS else ""
 993    if is_hypertable:
 994        nuke_queries = []
 995        temp_table = '_' + pipe.target + '_temp_migration'
 996        temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe))
 997
 998        if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug):
 999            nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}")
1000        nuke_queries += [
1001            f"SELECT * INTO {temp_table_name} FROM {pipe_name}",
1002            f"DROP TABLE {if_exists_str}{pipe_name}",
1003            f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}",
1004        ]
1005        nuke_ix_keys = ('datetime', 'id')
1006        nuked = False
1007        for ix_key in nuke_ix_keys:
1008            if ix_key in indices and not nuked:
1009                drop_queries[ix_key].extend(nuke_queries)
1010                nuked = True
1011
1012    for ix_key, ix_unquoted in indices.items():
1013        if ix_key in drop_queries:
1014            continue
1015        if ix_unquoted.lower() not in existing_indices:
1016            continue
1017
1018        if (
1019            ix_key == 'unique'
1020            and upsert
1021            and self.flavor not in ('sqlite', 'geopackage')
1022            and not is_hypertable
1023        ):
1024            constraint_name_unquoted = ix_unquoted.replace('IX_', 'UQ_')
1025            constraint_name = sql_item_name(constraint_name_unquoted, self.flavor)
1026            constraint_or_index = (
1027                "CONSTRAINT"
1028                if self.flavor not in ('mysql', 'mariadb')
1029                else 'INDEX'
1030            )
1031            drop_queries[ix_key].append(
1032                f"ALTER TABLE {pipe_name}\n"
1033                f"DROP {constraint_or_index} {constraint_name}"
1034            )
1035
1036        query = (
1037            (
1038                f"ALTER TABLE {pipe_name}\n"
1039                if self.flavor in ('mysql', 'mariadb')
1040                else ''
1041            )
1042            + f"DROP INDEX {if_exists_str}"
1043            + sql_item_name(ix_unquoted, self.flavor, index_schema)
1044        )
1045        if self.flavor == 'mssql':
1046            query += f"\nON {pipe_name}"
1047            if ix_unquoted == clustered_ix:
1048                query += "\nWITH (ONLINE = ON, MAXDOP = 4)"
1049        drop_queries[ix_key].append(query)
1050
1051
1052    return drop_queries

Return a dictionary mapping columns to a DROP INDEX or equivalent query.

Parameters
  • pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
  • A dictionary of column names mapping to lists of queries.
def get_add_columns_queries( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, Dict[str, str]]', _is_db_types: bool = False, debug: bool = False) -> List[str]:
3374def get_add_columns_queries(
3375    self,
3376    pipe: mrsm.Pipe,
3377    df: Union[pd.DataFrame, Dict[str, str]],
3378    _is_db_types: bool = False,
3379    debug: bool = False,
3380) -> List[str]:
3381    """
3382    Add new null columns of the correct type to a table from a dataframe.
3383
3384    Parameters
3385    ----------
3386    pipe: mrsm.Pipe
3387        The pipe to be altered.
3388
3389    df: Union[pd.DataFrame, Dict[str, str]]
3390        The pandas DataFrame which contains new columns.
3391        If a dictionary is provided, assume it maps columns to Pandas data types.
3392
3393    _is_db_types: bool, default False
3394        If `True`, assume `df` is a dictionary mapping columns to SQL native dtypes.
3395
3396    Returns
3397    -------
3398    A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
3399    """
3400    if not pipe.exists(debug=debug):
3401        return []
3402
3403    if pipe.parameters.get('static', False):
3404        return []
3405
3406    from decimal import Decimal
3407    import copy
3408    from meerschaum.utils.sql import (
3409        sql_item_name,
3410        SINGLE_ALTER_TABLE_FLAVORS,
3411        get_table_cols_types,
3412    )
3413    from meerschaum.utils.dtypes.sql import (
3414        get_pd_type_from_db_type,
3415        get_db_type_from_pd_type,
3416    )
3417    from meerschaum.utils.misc import flatten_list
3418    is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False
3419    if is_dask:
3420        df = df.partitions[0].compute()
3421    df_cols_types = (
3422        {
3423            col: str(typ)
3424            for col, typ in df.dtypes.items()
3425        }
3426        if not isinstance(df, dict)
3427        else copy.deepcopy(df)
3428    )
3429    if not isinstance(df, dict) and len(df.index) > 0:
3430        for col, typ in list(df_cols_types.items()):
3431            if typ != 'object':
3432                continue
3433            val = df.iloc[0][col]
3434            if isinstance(val, (dict, list)):
3435                df_cols_types[col] = 'json'
3436            elif isinstance(val, Decimal):
3437                df_cols_types[col] = 'numeric'
3438            elif isinstance(val, str):
3439                df_cols_types[col] = 'str'
3440    db_cols_types = {
3441        col: get_pd_type_from_db_type(typ)
3442        for col, typ in get_table_cols_types(
3443            pipe.target,
3444            self,
3445            schema=self.get_pipe_schema(pipe),
3446            debug=debug,
3447        ).items()
3448    }
3449    new_cols = set(df_cols_types) - set(db_cols_types)
3450    if not new_cols:
3451        return []
3452
3453    new_cols_types = {
3454        col: get_db_type_from_pd_type(
3455            df_cols_types[col],
3456            self.flavor
3457        )
3458        for col in new_cols
3459        if col and df_cols_types.get(col, None)
3460    }
3461
3462    alter_table_query = "ALTER TABLE " + sql_item_name(
3463        pipe.target, self.flavor, self.get_pipe_schema(pipe)
3464    )
3465    queries = []
3466    for col, typ in new_cols_types.items():
3467        add_col_query = (
3468            "\nADD "
3469            + sql_item_name(col, self.flavor, None)
3470            + " " + typ + ","
3471        )
3472
3473        if self.flavor in SINGLE_ALTER_TABLE_FLAVORS:
3474            queries.append(alter_table_query + add_col_query[:-1])
3475        else:
3476            alter_table_query += add_col_query
3477
3478    ### For most flavors, only one query is required.
3479    ### This covers SQLite which requires one query per column.
3480    if not queries:
3481        queries.append(alter_table_query[:-1])
3482
3483    if self.flavor != 'duckdb':
3484        return queries
3485
3486    ### NOTE: For DuckDB, we must drop and rebuild the indices.
3487    drop_index_queries = list(flatten_list(
3488        [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()]
3489    ))
3490    create_index_queries = list(flatten_list(
3491        [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()]
3492    ))
3493
3494    return drop_index_queries + queries + create_index_queries

Add new null columns of the correct type to a table from a dataframe.

Parameters
  • pipe (mrsm.Pipe): The pipe to be altered.
  • df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which contains new columns. If a dictionary is provided, assume it maps columns to Pandas data types.
  • _is_db_types (bool, default False): If True, assume df is a dictionary mapping columns to SQL native dtypes.
Returns
  • A list of the ALTER TABLE SQL query or queries to be executed on the provided connector.
def get_alter_columns_queries( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, Dict[str, str]]', debug: bool = False) -> List[str]:
3497def get_alter_columns_queries(
3498    self,
3499    pipe: mrsm.Pipe,
3500    df: Union[pd.DataFrame, Dict[str, str]],
3501    debug: bool = False,
3502) -> List[str]:
3503    """
3504    If we encounter a column of a different type, set the entire column to text.
3505    If the altered columns are numeric, alter to numeric instead.
3506
3507    Parameters
3508    ----------
3509    pipe: mrsm.Pipe
3510        The pipe to be altered.
3511
3512    df: Union[pd.DataFrame, Dict[str, str]]
3513        The pandas DataFrame which may contain altered columns.
3514        If a dict is provided, assume it maps columns to Pandas data types.
3515
3516    Returns
3517    -------
3518    A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
3519    """
3520    if not pipe.exists(debug=debug) or pipe.static:
3521        return []
3522
3523    from meerschaum.utils.sql import (
3524        sql_item_name,
3525        get_table_cols_types,
3526        DROP_IF_EXISTS_FLAVORS,
3527        SINGLE_ALTER_TABLE_FLAVORS,
3528    )
3529    from meerschaum.utils.dataframe import get_numeric_cols
3530    from meerschaum.utils.dtypes import are_dtypes_equal
3531    from meerschaum.utils.dtypes.sql import (
3532        get_pd_type_from_db_type,
3533        get_db_type_from_pd_type,
3534    )
3535    from meerschaum.utils.misc import flatten_list, generate_password, items_str
3536    target = pipe.target
3537    session_id = generate_password(3)
3538    numeric_cols = (
3539        get_numeric_cols(df)
3540        if not isinstance(df, dict)
3541        else [
3542            col
3543            for col, typ in df.items()
3544            if typ.startswith('numeric')
3545        ]
3546    )
3547    df_cols_types = (
3548        {
3549            col: str(typ)
3550            for col, typ in df.dtypes.items()
3551        }
3552        if not isinstance(df, dict)
3553        else df
3554    )
3555    db_cols_types = {
3556        col: get_pd_type_from_db_type(typ)
3557        for col, typ in get_table_cols_types(
3558            pipe.target,
3559            self,
3560            schema=self.get_pipe_schema(pipe),
3561            debug=debug,
3562        ).items()
3563    }
3564    pipe_dtypes = pipe.get_dtypes(debug=debug)
3565    pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3566    pd_db_df_aliases = {
3567        'int': 'bool',
3568        'float': 'bool',
3569        'numeric': 'bool',
3570        'guid': 'object',
3571    }
3572    if self.flavor == 'oracle':
3573        pd_db_df_aliases.update({
3574            'int': 'numeric',
3575            'date': 'datetime',
3576            'numeric': 'int',
3577        })
3578    elif self.flavor == 'geopackage':
3579        pd_db_df_aliases.update({
3580            'geometry': 'bytes',
3581            'bytes': 'geometry',
3582        })
3583
3584    altered_cols = {
3585        col: (db_cols_types.get(col, 'object'), typ)
3586        for col, typ in df_cols_types.items()
3587        if not are_dtypes_equal(typ, db_cols_types.get(col, 'object').lower())
3588        and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
3589    }
3590
3591    if debug and altered_cols:
3592        dprint("Columns to be altered:")
3593        mrsm.pprint(altered_cols)
3594
3595    ### NOTE: Special columns (numerics, bools, etc.) are captured and cached upon detection.
3596    new_special_cols = pipe._get_cached_value('new_special_cols', debug=debug) or {}
3597    new_special_db_cols_types = {
3598        col: (db_cols_types.get(col, 'object'), typ)
3599        for col, typ in new_special_cols.items()
3600    }
3601    if debug:
3602        dprint("Cached new special columns:")
3603        mrsm.pprint(new_special_cols)
3604        dprint("New special columns db types:")
3605        mrsm.pprint(new_special_db_cols_types)
3606
3607    altered_cols.update(new_special_db_cols_types)
3608
3609    ### NOTE: Sometimes bools are coerced into ints or floats.
3610    altered_cols_to_ignore = set()
3611    for col, (db_typ, df_typ) in altered_cols.items():
3612        for db_alias, df_alias in pd_db_df_aliases.items():
3613            if (
3614                db_alias in db_typ.lower()
3615                and df_alias in df_typ.lower()
3616                and col not in new_special_cols
3617            ):
3618                altered_cols_to_ignore.add(col)
3619
3620    ### Oracle's bool handling sometimes mixes NUMBER and INT.
3621    for bool_col in pipe_bool_cols:
3622        if bool_col not in altered_cols:
3623            continue
3624        db_is_bool_compatible = (
3625            are_dtypes_equal('int', altered_cols[bool_col][0])
3626            or are_dtypes_equal('float', altered_cols[bool_col][0])
3627            or are_dtypes_equal('numeric', altered_cols[bool_col][0])
3628            or are_dtypes_equal('bool', altered_cols[bool_col][0])
3629        )
3630        df_is_bool_compatible = (
3631            are_dtypes_equal('int', altered_cols[bool_col][1])
3632            or are_dtypes_equal('float', altered_cols[bool_col][1])
3633            or are_dtypes_equal('numeric', altered_cols[bool_col][1])
3634            or are_dtypes_equal('bool', altered_cols[bool_col][1])
3635        )
3636        if db_is_bool_compatible and df_is_bool_compatible:
3637            altered_cols_to_ignore.add(bool_col)
3638
3639    if debug and altered_cols_to_ignore:
3640        dprint("Ignoring the following altered columns (false positives).")
3641        mrsm.pprint(altered_cols_to_ignore)
3642
3643    for col in altered_cols_to_ignore:
3644        _ = altered_cols.pop(col, None)
3645
3646    if not altered_cols:
3647        return []
3648
3649    if numeric_cols:
3650        explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
3651        explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols})
3652        pipe.dtypes = explicit_pipe_dtypes
3653        if not pipe.temporary:
3654            edit_success, edit_msg = pipe.edit(debug=debug)
3655            if not edit_success:
3656                warn(
3657                    f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
3658                    + f"{edit_msg}"
3659                )
3660    else:
3661        numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')])
3662
3663    numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
3664    text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
3665    altered_cols_types = {
3666        col: (
3667            numeric_type
3668            if col in numeric_cols
3669            else text_type
3670        )
3671        for col, (db_typ, typ) in altered_cols.items()
3672    }
3673
3674    if self.flavor in ('sqlite', 'geopackage'):
3675        temp_table_name = '-' + session_id + '_' + target
3676        rename_query = (
3677            "ALTER TABLE "
3678            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3679            + " RENAME TO "
3680            + sql_item_name(temp_table_name, self.flavor, None)
3681        )
3682        create_query = (
3683            "CREATE TABLE "
3684            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3685            + " (\n"
3686        )
3687        for col_name, col_typ in db_cols_types.items():
3688            create_query += (
3689                sql_item_name(col_name, self.flavor, None)
3690                + " "
3691                + (
3692                    col_typ
3693                    if col_name not in altered_cols
3694                    else altered_cols_types[col_name]
3695                )
3696                + ",\n"
3697            )
3698        create_query = create_query[:-2] + "\n)"
3699
3700        insert_query = (
3701            "INSERT INTO "
3702            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3703            + ' ('
3704            + ', '.join([
3705                sql_item_name(col_name, self.flavor, None)
3706                for col_name in db_cols_types
3707            ])
3708            + ')'
3709            + "\nSELECT\n"
3710        )
3711        for col_name in db_cols_types:
3712            new_col_str = (
3713                sql_item_name(col_name, self.flavor, None)
3714                if col_name not in altered_cols
3715                else (
3716                    "CAST("
3717                    + sql_item_name(col_name, self.flavor, None)
3718                    + " AS "
3719                    + altered_cols_types[col_name]
3720                    + ")"
3721                )
3722            )
3723            insert_query += new_col_str + ",\n"
3724
3725        insert_query = insert_query[:-2] + (
3726            f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}"
3727        )
3728
3729        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3730
3731        drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name(
3732            temp_table_name, self.flavor, self.get_pipe_schema(pipe)
3733        )
3734        return [
3735            rename_query,
3736            create_query,
3737            insert_query,
3738            drop_query,
3739        ]
3740
3741    queries = []
3742    if self.flavor == 'oracle':
3743        for col, typ in altered_cols_types.items():
3744            add_query = (
3745                "ALTER TABLE "
3746                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3747                + "\nADD " + sql_item_name(col + '_temp', self.flavor, None)
3748                + " " + typ
3749            )
3750            queries.append(add_query)
3751
3752        for col, typ in altered_cols_types.items():
3753            populate_temp_query = (
3754                "UPDATE "
3755                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3756                + "\nSET " + sql_item_name(col + '_temp', self.flavor, None)
3757                + ' = ' + sql_item_name(col, self.flavor, None)
3758            )
3759            queries.append(populate_temp_query)
3760
3761        for col, typ in altered_cols_types.items():
3762            set_old_cols_to_null_query = (
3763                "UPDATE "
3764                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3765                + "\nSET " + sql_item_name(col, self.flavor, None)
3766                + ' = NULL'
3767            )
3768            queries.append(set_old_cols_to_null_query)
3769
3770        for col, typ in altered_cols_types.items():
3771            alter_type_query = (
3772                "ALTER TABLE "
3773                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3774                + "\nMODIFY " + sql_item_name(col, self.flavor, None) + ' '
3775                + typ
3776            )
3777            queries.append(alter_type_query)
3778
3779        for col, typ in altered_cols_types.items():
3780            set_old_to_temp_query = (
3781                "UPDATE "
3782                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3783                + "\nSET " + sql_item_name(col, self.flavor, None)
3784                + ' = ' + sql_item_name(col + '_temp', self.flavor, None)
3785            )
3786            queries.append(set_old_to_temp_query)
3787
3788        for col, typ in altered_cols_types.items():
3789            drop_temp_query = (
3790                "ALTER TABLE "
3791                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3792                + "\nDROP COLUMN " + sql_item_name(col + '_temp', self.flavor, None)
3793            )
3794            queries.append(drop_temp_query)
3795
3796        return queries
3797
3798    query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3799    for col, typ in altered_cols_types.items():
3800        alter_col_prefix = (
3801            'ALTER' if self.flavor not in ('mysql', 'mariadb', 'oracle')
3802            else 'MODIFY'
3803        )
3804        type_prefix = (
3805            '' if self.flavor in ('mssql', 'mariadb', 'mysql')
3806            else 'TYPE '
3807        )
3808        column_str = 'COLUMN' if self.flavor != 'oracle' else ''
3809        query_suffix = (
3810            f"\n{alter_col_prefix} {column_str} "
3811            + sql_item_name(col, self.flavor, None)
3812            + " " + type_prefix + typ + ","
3813        )
3814        if self.flavor not in SINGLE_ALTER_TABLE_FLAVORS:
3815            query += query_suffix
3816        else:
3817            queries.append(query + query_suffix[:-1])
3818
3819    if self.flavor not in SINGLE_ALTER_TABLE_FLAVORS:
3820        queries.append(query[:-1])
3821
3822    if self.flavor != 'duckdb':
3823        return queries
3824
3825    drop_index_queries = list(flatten_list(
3826        [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()]
3827    ))
3828    create_index_queries = list(flatten_list(
3829        [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()]
3830    ))
3831
3832    return drop_index_queries + queries + create_index_queries

If we encounter a column of a different type, set the entire column to text. If the altered columns are numeric, alter to numeric instead.

Parameters
  • pipe (mrsm.Pipe): The pipe to be altered.
  • df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which may contain altered columns. If a dict is provided, assume it maps columns to Pandas data types.
Returns
  • A list of the ALTER TABLE SQL query or queries to be executed on the provided connector.
def delete_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
1055def delete_pipe(
1056    self,
1057    pipe: mrsm.Pipe,
1058    debug: bool = False,
1059) -> SuccessTuple:
1060    """
1061    Delete a Pipe's registration.
1062    """
1063    from meerschaum.utils.packages import attempt_import
1064    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
1065
1066    if not pipe.id:
1067        return False, f"{pipe} is not registered."
1068
1069    ### ensure pipes table exists
1070    from meerschaum.connectors.sql.tables import get_tables
1071    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1072
1073    q = sqlalchemy.delete(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
1074    if not self.exec(q, debug=debug):
1075        return False, f"Failed to delete registration for {pipe}."
1076
1077    return True, "Success"

Delete a Pipe's registration.

def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, str, NoneType] = None, end: Union[datetime.datetime, str, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: str = 'asc', limit: Optional[int] = None, begin_add_minutes: int = 0, end_add_minutes: int = 0, chunksize: Optional[int] = -1, as_iterator: bool = False, debug: bool = False, **kw: Any) -> 'Union[pd.DataFrame, None]':
1080def get_pipe_data(
1081    self,
1082    pipe: mrsm.Pipe,
1083    select_columns: Optional[List[str]] = None,
1084    omit_columns: Optional[List[str]] = None,
1085    begin: Union[datetime, str, None] = None,
1086    end: Union[datetime, str, None] = None,
1087    params: Optional[Dict[str, Any]] = None,
1088    order: str = 'asc',
1089    limit: Optional[int] = None,
1090    begin_add_minutes: int = 0,
1091    end_add_minutes: int = 0,
1092    chunksize: Optional[int] = -1,
1093    as_iterator: bool = False,
1094    debug: bool = False,
1095    **kw: Any
1096) -> Union[pd.DataFrame, None]:
1097    """
1098    Access a pipe's data from the SQL instance.
1099
1100    Parameters
1101    ----------
1102    pipe: mrsm.Pipe:
1103        The pipe to get data from.
1104
1105    select_columns: Optional[List[str]], default None
1106        If provided, only select these given columns.
1107        Otherwise select all available columns (i.e. `SELECT *`).
1108
1109    omit_columns: Optional[List[str]], default None
1110        If provided, remove these columns from the selection.
1111
1112    begin: Union[datetime, str, None], default None
1113        If provided, get rows newer than or equal to this value.
1114
1115    end: Union[datetime, str, None], default None
1116        If provided, get rows older than or equal to this value.
1117
1118    params: Optional[Dict[str, Any]], default None
1119        Additional parameters to filter by.
1120        See `meerschaum.connectors.sql.build_where`.
1121
1122    order: Optional[str], default 'asc'
1123        The selection order for all of the indices in the query.
1124        If `None`, omit the `ORDER BY` clause.
1125
1126    limit: Optional[int], default None
1127        If specified, limit the number of rows retrieved to this value.
1128
1129    begin_add_minutes: int, default 0
1130        The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1131
1132    end_add_minutes: int, default 0
1133        The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1134
1135    chunksize: Optional[int], default -1
1136        The size of dataframe chunks to load into memory.
1137
1138    as_iterator: bool, default False
1139        If `True`, return the chunks iterator directly.
1140
1141    debug: bool, default False
1142        Verbosity toggle.
1143
1144    Returns
1145    -------
1146    A `pd.DataFrame` of the pipe's data.
1147
1148    """
1149    from meerschaum.utils.packages import import_pandas
1150    from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal
1151    from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1152    pd = import_pandas()
1153    is_dask = 'dask' in pd.__name__
1154
1155    cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
1156    pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {}
1157
1158    remote_pandas_types = {
1159        col: to_pandas_dtype(get_pd_type_from_db_type(typ))
1160        for col, typ in cols_types.items()
1161    }
1162    remote_dt_cols_types = {
1163        col: typ
1164        for col, typ in remote_pandas_types.items()
1165        if are_dtypes_equal(typ, 'datetime')
1166    }
1167    remote_dt_tz_aware_cols_types = {
1168        col: typ
1169        for col, typ in remote_dt_cols_types.items()
1170        if ',' in typ or typ == 'datetime'
1171    }
1172    remote_dt_tz_naive_cols_types = {
1173        col: typ
1174        for col, typ in remote_dt_cols_types.items()
1175        if col not in remote_dt_tz_aware_cols_types
1176    }
1177
1178    configured_pandas_types = {
1179        col: to_pandas_dtype(typ)
1180        for col, typ in pipe_dtypes.items()
1181    }
1182    configured_lower_precision_dt_cols_types = {
1183        col: typ
1184        for col, typ in pipe_dtypes.items()
1185        if (
1186            are_dtypes_equal('datetime', typ)
1187            and '[' in typ
1188            and 'ns' not in typ
1189        )
1190        
1191    }
1192
1193    dtypes = {
1194        **remote_pandas_types,
1195        **configured_pandas_types,
1196        **remote_dt_tz_aware_cols_types,
1197        **remote_dt_tz_naive_cols_types,
1198        **configured_lower_precision_dt_cols_types
1199    } if pipe.enforce else {}
1200
1201    existing_cols = cols_types.keys()
1202    select_columns = (
1203        [
1204            col
1205            for col in existing_cols
1206            if col not in (omit_columns or [])
1207        ]
1208        if not select_columns
1209        else [
1210            col
1211            for col in select_columns
1212            if col in existing_cols
1213            and col not in (omit_columns or [])
1214        ]
1215    ) if pipe.enforce else select_columns
1216
1217    if select_columns:
1218        dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
1219
1220    dtypes = {
1221        col: typ
1222        for col, typ in dtypes.items()
1223        if col in (select_columns or [col]) and col not in (omit_columns or [])
1224    } if pipe.enforce else {}
1225
1226    if debug:
1227        dprint(f"[{self}] `read()` dtypes:")
1228        mrsm.pprint(dtypes)
1229
1230    query = self.get_pipe_data_query(
1231        pipe,
1232        select_columns=select_columns,
1233        omit_columns=omit_columns,
1234        begin=begin,
1235        end=end,
1236        params=params,
1237        order=order,
1238        limit=limit,
1239        begin_add_minutes=begin_add_minutes,
1240        end_add_minutes=end_add_minutes,
1241        debug=debug,
1242        **kw
1243    )
1244
1245    read_kwargs = {}
1246    if is_dask:
1247        index_col = pipe.columns.get('datetime', None)
1248        read_kwargs['index_col'] = index_col
1249
1250    chunks = self.read(
1251        query,
1252        chunksize=chunksize,
1253        as_iterator=True,
1254        coerce_float=False,
1255        dtype=dtypes,
1256        debug=debug,
1257        **read_kwargs
1258    )
1259
1260    if as_iterator:
1261        return chunks
1262
1263    return pd.concat(chunks)

Access a pipe's data from the SQL instance.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get data from.
  • select_columns (Optional[List[str]], default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
  • begin (Union[datetime, str, None], default None): If provided, get rows newer than or equal to this value.
  • end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
  • params (Optional[Dict[str, Any]], default None): Additional parameters to filter by. See meerschaum.connectors.sql.build_where.
  • order (Optional[str], default 'asc'): The selection order for all of the indices in the query. If None, omit the ORDER BY clause.
  • limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
  • begin_add_minutes (int, default 0): The number of minutes to add to the begin datetime (i.e. DATEADD).
  • end_add_minutes (int, default 0): The number of minutes to add to the end datetime (i.e. DATEADD).
  • chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
  • as_iterator (bool, default False): If True, return the chunks iterator directly.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A pd.DataFrame of the pipe's data.
def get_pipe_docs( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, str, NoneType] = None, end: Union[datetime.datetime, str, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: str = 'asc', limit: Optional[int] = None, debug: bool = False, **kw: Any) -> List[Dict[str, Any]]:
1266def get_pipe_docs(
1267    self,
1268    pipe: mrsm.Pipe,
1269    select_columns: Optional[List[str]] = None,
1270    omit_columns: Optional[List[str]] = None,
1271    begin: Union[datetime, str, None] = None,
1272    end: Union[datetime, str, None] = None,
1273    params: Optional[Dict[str, Any]] = None,
1274    order: str = 'asc',
1275    limit: Optional[int] = None,
1276    debug: bool = False,
1277    **kw: Any
1278) -> List[Dict[str, Any]]:
1279    """
1280    Return a pipe's data as a list of dictionaries, bypassing pandas overhead.
1281    """
1282    query = self.get_pipe_data_query(
1283        pipe=pipe,
1284        select_columns=select_columns,
1285        omit_columns=omit_columns,
1286        begin=begin,
1287        end=end,
1288        params=params,
1289        order=order,
1290        limit=limit,
1291        debug=debug,
1292    )
1293    if query is None:
1294        return []
1295    result = self.exec(query, silent=True, debug=debug)
1296    if result is None:
1297        return []
1298    return [dict(row) for row in result.mappings().fetchall()]

Return a pipe's data as a list of dictionaries, bypassing pandas overhead.

def get_pipe_data_query( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, int, str, NoneType] = None, end: Union[datetime.datetime, int, str, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: Optional[str] = 'asc', sort_datetimes: bool = False, limit: Optional[int] = None, begin_add_minutes: int = 0, end_add_minutes: int = 0, replace_nulls: Optional[str] = None, skip_existing_cols_check: bool = False, debug: bool = False, **kw: Any) -> Optional[str]:
1301def get_pipe_data_query(
1302    self,
1303    pipe: mrsm.Pipe,
1304    select_columns: Optional[List[str]] = None,
1305    omit_columns: Optional[List[str]] = None,
1306    begin: Union[datetime, int, str, None] = None,
1307    end: Union[datetime, int, str, None] = None,
1308    params: Optional[Dict[str, Any]] = None,
1309    order: Optional[str] = 'asc',
1310    sort_datetimes: bool = False,
1311    limit: Optional[int] = None,
1312    begin_add_minutes: int = 0,
1313    end_add_minutes: int = 0,
1314    replace_nulls: Optional[str] = None,
1315    skip_existing_cols_check: bool = False,
1316    debug: bool = False,
1317    **kw: Any
1318) -> Union[str, None]:
1319    """
1320    Return the `SELECT` query for retrieving a pipe's data from its instance.
1321
1322    Parameters
1323    ----------
1324    pipe: mrsm.Pipe:
1325        The pipe to get data from.
1326
1327    select_columns: Optional[List[str]], default None
1328        If provided, only select these given columns.
1329        Otherwise select all available columns (i.e. `SELECT *`).
1330
1331    omit_columns: Optional[List[str]], default None
1332        If provided, remove these columns from the selection.
1333
1334    begin: Union[datetime, int, str, None], default None
1335        If provided, get rows newer than or equal to this value.
1336
1337    end: Union[datetime, str, None], default None
1338        If provided, get rows older than or equal to this value.
1339
1340    params: Optional[Dict[str, Any]], default None
1341        Additional parameters to filter by.
1342        See `meerschaum.connectors.sql.build_where`.
1343
1344    order: Optional[str], default None
1345        The selection order for all of the indices in the query.
1346        If `None`, omit the `ORDER BY` clause.
1347
1348    sort_datetimes: bool, default False
1349        Alias for `order='desc'`.
1350
1351    limit: Optional[int], default None
1352        If specified, limit the number of rows retrieved to this value.
1353
1354    begin_add_minutes: int, default 0
1355        The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1356
1357    end_add_minutes: int, default 0
1358        The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1359
1360    chunksize: Optional[int], default -1
1361        The size of dataframe chunks to load into memory.
1362
1363    replace_nulls: Optional[str], default None
1364        If provided, replace null values with this value.
1365
1366    skip_existing_cols_check: bool, default False
1367        If `True`, do not verify that querying columns are actually on the table.
1368
1369    debug: bool, default False
1370        Verbosity toggle.
1371
1372    Returns
1373    -------
1374    A `SELECT` query to retrieve a pipe's data.
1375    """
1376    from meerschaum.utils.misc import items_str
1377    from meerschaum.utils.sql import sql_item_name, dateadd_str
1378    from meerschaum.utils.dtypes import coerce_timezone
1379    from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
1380
1381    dt_col = pipe.columns.get('datetime', None)
1382    existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1383    skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1384    dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1385    dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
1386    select_columns = (
1387        [col for col in existing_cols]
1388        if not select_columns
1389        else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
1390    )
1391    if omit_columns:
1392        select_columns = [col for col in select_columns if col not in omit_columns]
1393
1394    if order is None and sort_datetimes:
1395        order = 'desc'
1396
1397    if begin == '':
1398        begin = pipe.get_sync_time(debug=debug)
1399        backtrack_interval = pipe.get_backtrack_interval(debug=debug)
1400        if begin is not None:
1401            begin -= backtrack_interval
1402
1403    begin, end = pipe.parse_date_bounds(begin, end)
1404    if isinstance(begin, datetime) and dt_typ:
1405        begin = coerce_timezone(begin, strip_utc=('utc' not in dt_typ.lower()))
1406    if isinstance(end, datetime) and dt_typ:
1407        end = coerce_timezone(end, strip_utc=('utc' not in dt_typ.lower()))
1408
1409    cols_names = [
1410        sql_item_name(col, self.flavor, None)
1411        for col in select_columns
1412    ]
1413    select_cols_str = (
1414        'SELECT\n    '
1415        + ',\n    '.join(
1416            [
1417                (
1418                    col_name
1419                    if not replace_nulls
1420                    else f"COALESCE(col_name, '{replace_nulls}') AS {col_name}"
1421                )
1422                for col_name in cols_names
1423            ]
1424        )
1425    ) if cols_names else 'SELECT *'
1426    pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1427    query = f"{select_cols_str}\nFROM {pipe_table_name}"
1428    where = ""
1429
1430    ### MariaDB 12.x optimizer bug: an ordered index scan with `LIMIT` over a `RANGE COLUMNS`-
1431    ### partitioned table that must read a non-indexed column returns zero rows (see the
1432    ### `get_sync_time` workaround in CLAUDE.md — same family, MariaDB-only). Wrapping the
1433    ### datetime column in the `ORDER BY` with a no-op `COALESCE(col, col)` forces a filesort
1434    ### over the fetched rows instead of the broken index walk, with identical ordering. Gated
1435    ### on `LIMIT` (the bug's trigger) so unlimited ordered reads keep the index-ordered scan.
1436    mariadb_partition_order_workaround = (
1437        self.flavor == 'mariadb'
1438        and isinstance(limit, int)
1439        and self._should_partition(pipe)
1440    )
1441
1442    if order is not None:
1443        default_order = 'asc'
1444        if order not in ('asc', 'desc'):
1445            warn(f"Ignoring unsupported order '{order}'. Falling back to '{default_order}'.")
1446            order = default_order
1447        order = order.upper()
1448
1449    if not pipe.columns.get('datetime', None):
1450        _dt = pipe.guess_datetime()
1451        dt = sql_item_name(_dt, self.flavor, None) if _dt else None
1452        is_guess = True
1453    else:
1454        _dt = pipe.get_columns('datetime')
1455        dt = sql_item_name(_dt, self.flavor, None)
1456        is_guess = False
1457
1458    quoted_indices = {
1459        key: sql_item_name(val, self.flavor, None)
1460        for key, val in pipe.columns.items()
1461        if val in existing_cols or skip_existing_cols_check
1462    }
1463
1464    if begin is not None or end is not None:
1465        if is_guess:
1466            if _dt is None:
1467                warn(
1468                    f"No datetime could be determined for {pipe}."
1469                    + "\n    Ignoring begin and end...",
1470                    stack=False,
1471                )
1472                begin, end = None, None
1473            else:
1474                warn(
1475                    f"A datetime wasn't specified for {pipe}.\n"
1476                    + f"    Using column \"{_dt}\" for datetime bounds...",
1477                    stack=False,
1478                )
1479
1480    is_dt_bound = False
1481    if begin is not None and (_dt in existing_cols or skip_existing_cols_check):
1482        begin_da = dateadd_str(
1483            flavor=self.flavor,
1484            datepart='minute',
1485            number=begin_add_minutes,
1486            begin=begin,
1487            db_type=dt_db_type,
1488        )
1489        where += f"\n    {dt} >= {begin_da}" + ("\n    AND\n    " if end is not None else "")
1490        is_dt_bound = True
1491
1492    if end is not None and (_dt in existing_cols or skip_existing_cols_check):
1493        if 'int' in str(type(end)).lower() and end == begin:
1494            end += 1
1495        end_da = dateadd_str(
1496            flavor=self.flavor,
1497            datepart='minute',
1498            number=end_add_minutes,
1499            begin=end,
1500            db_type=dt_db_type,
1501        )
1502        where += f"{dt} <  {end_da}"
1503        is_dt_bound = True
1504
1505    if params is not None:
1506        from meerschaum.utils.sql import build_where
1507        valid_params = {
1508            k: v
1509            for k, v in params.items()
1510            if k in existing_cols or skip_existing_cols_check
1511        }
1512        if valid_params:
1513            where += '    ' + build_where(valid_params, self).lstrip().replace(
1514                'WHERE', ('    AND' if is_dt_bound else "    ")
1515            )
1516
1517    if len(where) > 0:
1518        query += "\nWHERE " + where
1519
1520    if order is not None:
1521        ### Sort by indices, starting with datetime.
1522        order_by = ""
1523        if quoted_indices:
1524            order_by += "\nORDER BY "
1525            if _dt and (_dt in existing_cols or skip_existing_cols_check):
1526                dt_order_expr = (
1527                    f"COALESCE({dt}, {dt})"
1528                    if mariadb_partition_order_workaround
1529                    else dt
1530                )
1531                order_by += dt_order_expr + ' ' + order + ','
1532            for key, quoted_col_name in quoted_indices.items():
1533                if dt == quoted_col_name:
1534                    continue
1535                order_by += ' ' + quoted_col_name + ' ' + order + ','
1536            order_by = order_by[:-1]
1537
1538        query += order_by
1539
1540    if isinstance(limit, int):
1541        if self.flavor == 'mssql':
1542            query = f'SELECT TOP {limit}\n' + query[len("SELECT "):]
1543        elif self.flavor == 'oracle':
1544            query = (
1545                f"SELECT * FROM (\n  {query}\n)\n"
1546                + f"WHERE ROWNUM IN ({', '.join([str(i) for i in range(1, limit+1)])})"
1547            )
1548        else:
1549            query += f"\nLIMIT {limit}"
1550
1551    if debug:
1552        to_print = (
1553            []
1554            + ([f"begin='{begin}'"] if begin else [])
1555            + ([f"end='{end}'"] if end else [])
1556            + ([f"params={params}"] if params else [])
1557        )
1558        dprint("Getting pipe data with constraints: " + items_str(to_print, quotes=False))
1559
1560    return query

Return the SELECT query for retrieving a pipe's data from its instance.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get data from.
  • select_columns (Optional[List[str]], default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
  • begin (Union[datetime, int, str, None], default None): If provided, get rows newer than or equal to this value.
  • end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
  • params (Optional[Dict[str, Any]], default None): Additional parameters to filter by. See meerschaum.connectors.sql.build_where.
  • order (Optional[str], default None): The selection order for all of the indices in the query. If None, omit the ORDER BY clause.
  • sort_datetimes (bool, default False): Alias for order='desc'.
  • limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
  • begin_add_minutes (int, default 0): The number of minutes to add to the begin datetime (i.e. DATEADD).
  • end_add_minutes (int, default 0): The number of minutes to add to the end datetime (i.e. DATEADD).
  • chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
  • replace_nulls (Optional[str], default None): If provided, replace null values with this value.
  • skip_existing_cols_check (bool, default False): If True, do not verify that querying columns are actually on the table.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SELECT query to retrieve a pipe's data.
def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
21def register_pipe(
22    self,
23    pipe: mrsm.Pipe,
24    debug: bool = False,
25) -> SuccessTuple:
26    """
27    Register a new pipe.
28    A pipe's attributes must be set before registering.
29    """
30    from meerschaum.utils.packages import attempt_import
31    from meerschaum.utils.sql import json_flavors
32
33    ### ensure pipes table exists
34    from meerschaum.connectors.sql.tables import get_tables
35    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
36
37    if pipe.id is not None:
38        return False, f"{pipe} is already registered."
39
40    ### NOTE: if `parameters` is supplied in the Pipe constructor,
41    ###       then `pipe.parameters` will exist and not be fetched from the database.
42
43    ### 1. Prioritize the Pipe object's `parameters` first.
44    ###    E.g. if the user manually sets the `parameters` property
45    ###    or if the Pipe already exists
46    ###    (which shouldn't be able to be registered anyway but that's an issue for later).
47    parameters = None
48    try:
49        parameters = pipe.get_parameters(apply_symlinks=False)
50    except Exception as e:
51        if debug:
52            dprint(str(e))
53        parameters = None
54
55    ### ensure `parameters` is a dictionary
56    if parameters is None:
57        parameters = {}
58
59    import json
60    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
61    values = {
62        'connector_keys' : pipe.connector_keys,
63        'metric_key'     : pipe.metric_key,
64        'location_key'   : pipe.location_key,
65        'parameters'     : (
66            json.dumps(parameters)
67            if self.flavor not in json_flavors
68            else parameters
69        ),
70    }
71    query = sqlalchemy.insert(pipes_tbl).values(**values)
72    result = self.exec(query, debug=debug)
73    if result is None:
74        return False, f"Failed to register {pipe}."
75    return True, f"Successfully registered {pipe}."

Register a new pipe. A pipe's attributes must be set before registering.

def edit_pipe( self, pipe: meerschaum.Pipe, patch: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
 78def edit_pipe(
 79    self,
 80    pipe: mrsm.Pipe,
 81    patch: bool = False,
 82    debug: bool = False,
 83    **kw : Any
 84) -> SuccessTuple:
 85    """
 86    Persist a Pipe's parameters to its database.
 87
 88    Parameters
 89    ----------
 90    pipe: mrsm.Pipe, default None
 91        The pipe to be edited.
 92    patch: bool, default False
 93        If patch is `True`, update the existing parameters by cascading.
 94        Otherwise overwrite the parameters (default).
 95    debug: bool, default False
 96        Verbosity toggle.
 97    """
 98
 99    if pipe.id is None:
100        return False, f"{pipe} is not registered and cannot be edited."
101
102    from meerschaum.utils.packages import attempt_import
103    from meerschaum.utils.sql import json_flavors
104    if not patch:
105        parameters = pipe.__dict__.get('_attributes', {}).get('parameters', {})
106    else:
107        from meerschaum import Pipe
108        from meerschaum.config._patch import apply_patch_to_config
109        original_parameters = Pipe(
110            pipe.connector_keys, pipe.metric_key, pipe.location_key,
111            mrsm_instance=pipe.instance_keys
112        ).get_parameters(apply_symlinks=False)
113        parameters = apply_patch_to_config(
114            original_parameters,
115            pipe._attributes['parameters']
116        )
117
118    ### ensure pipes table exists
119    from meerschaum.connectors.sql.tables import get_tables
120    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
121
122    import json
123    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
124
125    values = {
126        'parameters': (
127            json.dumps(parameters)
128            if self.flavor not in json_flavors
129            else parameters
130        ),
131    }
132    q = sqlalchemy.update(pipes_tbl).values(**values).where(
133        pipes_tbl.c.pipe_id == pipe.id
134    )
135
136    result = self.exec(q, debug=debug)
137    message = (
138        f"Successfully edited {pipe}."
139        if result is not None else f"Failed to edit {pipe}."
140    )
141    return (result is not None), message

Persist a Pipe's parameters to its database.

Parameters
  • pipe (mrsm.Pipe, default None): The pipe to be edited.
  • patch (bool, default False): If patch is True, update the existing parameters by cascading. Otherwise overwrite the parameters (default).
  • debug (bool, default False): Verbosity toggle.
def get_pipe_id(self, pipe: meerschaum.Pipe, debug: bool = False) -> Any:
1563def get_pipe_id(
1564    self,
1565    pipe: mrsm.Pipe,
1566    debug: bool = False,
1567) -> Any:
1568    """
1569    Get a Pipe's ID from the pipes table.
1570    """
1571    if pipe.temporary:
1572        return None
1573    from meerschaum.utils.packages import attempt_import
1574    sqlalchemy = attempt_import('sqlalchemy')
1575    from meerschaum.connectors.sql.tables import get_tables
1576    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1577
1578    query = sqlalchemy.select(pipes_tbl.c.pipe_id).where(
1579        pipes_tbl.c.connector_keys == pipe.connector_keys
1580    ).where(
1581        pipes_tbl.c.metric_key == pipe.metric_key
1582    ).where(
1583        (pipes_tbl.c.location_key == pipe.location_key) if pipe.location_key is not None
1584        else pipes_tbl.c.location_key.is_(None)
1585    )
1586    _id = self.value(query, debug=debug, silent=pipe.temporary)
1587    if _id is not None:
1588        _id = int(_id)
1589    return _id

Get a Pipe's ID from the pipes table.

def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, Any]:
1592def get_pipe_attributes(
1593    self,
1594    pipe: mrsm.Pipe,
1595    debug: bool = False,
1596) -> Dict[str, Any]:
1597    """
1598    Get a Pipe's attributes dictionary.
1599    """
1600    from meerschaum.connectors.sql.tables import get_tables
1601    from meerschaum.utils.packages import attempt_import
1602    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
1603
1604    if pipe.id is None:
1605        return {}
1606
1607    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1608
1609    try:
1610        q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
1611        if debug:
1612            dprint(q)
1613        rows = (
1614            self.exec(q, silent=True, debug=debug).mappings().all()
1615            if self.flavor != 'duckdb'
1616            else self.read(q, debug=debug).to_dict(orient='records')
1617        )
1618        if not rows:
1619            return {}
1620        attributes = dict(rows[0])
1621    except Exception:
1622        if debug:
1623            dprint(traceback.format_exc())
1624        return {}
1625
1626    ### handle non-PostgreSQL databases (text vs JSON)
1627    if not isinstance(attributes.get('parameters', None), dict):
1628        try:
1629            import json
1630            parameters = json.loads(attributes['parameters'])
1631            if isinstance(parameters, str) and parameters[0] == '{':
1632                parameters = json.loads(parameters)
1633            attributes['parameters'] = parameters
1634        except Exception:
1635            attributes['parameters'] = {}
1636
1637    return attributes

Get a Pipe's attributes dictionary.

def sync_pipe( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, str, Dict[Any, Any], None]' = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, chunksize: Optional[int] = -1, check_existing: bool = True, blocking: bool = True, debug: bool = False, _check_temporary_tables: bool = True, **kw: Any) -> Tuple[bool, str]:
1803def sync_pipe(
1804    self,
1805    pipe: mrsm.Pipe,
1806    df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None,
1807    begin: Union[datetime, int, None] = None,
1808    end: Union[datetime, int, None] = None,
1809    chunksize: Optional[int] = -1,
1810    check_existing: bool = True,
1811    blocking: bool = True,
1812    debug: bool = False,
1813    _check_temporary_tables: bool = True,
1814    **kw: Any
1815) -> SuccessTuple:
1816    """
1817    Sync a pipe using a database connection.
1818
1819    Parameters
1820    ----------
1821    pipe: mrsm.Pipe
1822        The Meerschaum Pipe instance into which to sync the data.
1823
1824    df: Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]
1825        An optional DataFrame or equivalent to sync into the pipe.
1826        Defaults to `None`.
1827
1828    begin: Union[datetime, int, None], default None
1829        Optionally specify the earliest datetime to search for data.
1830        Defaults to `None`.
1831
1832    end: Union[datetime, int, None], default None
1833        Optionally specify the latest datetime to search for data.
1834        Defaults to `None`.
1835
1836    chunksize: Optional[int], default -1
1837        Specify the number of rows to sync per chunk.
1838        If `-1`, resort to system configuration (default is `900`).
1839        A `chunksize` of `None` will sync all rows in one transaction.
1840        Defaults to `-1`.
1841
1842    check_existing: bool, default True
1843        If `True`, pull and diff with existing data from the pipe. Defaults to `True`.
1844
1845    blocking: bool, default True
1846        If `True`, wait for sync to finish and return its result, otherwise asyncronously sync.
1847        Defaults to `True`.
1848
1849    debug: bool, default False
1850        Verbosity toggle. Defaults to False.
1851
1852    kw: Any
1853        Catch-all for keyword arguments.
1854
1855    Returns
1856    -------
1857    A `SuccessTuple` of success (`bool`) and message (`str`).
1858    """
1859    from meerschaum.utils.packages import import_pandas
1860    from meerschaum.utils.sql import (
1861        get_update_queries,
1862        sql_item_name,
1863        UPDATE_QUERIES,
1864        get_reset_autoincrement_queries,
1865    )
1866    from meerschaum.utils.dtypes import get_current_timestamp
1867    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1868    from meerschaum.utils.dataframe import get_special_cols
1869    from meerschaum import Pipe
1870    import time
1871    import copy
1872    pd = import_pandas()
1873    if df is None:
1874        msg = f"DataFrame is None. Cannot sync {pipe}."
1875        warn(msg)
1876        return False, msg
1877
1878    start = time.perf_counter()
1879    pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
1880    dtypes = pipe.get_dtypes(debug=debug)
1881
1882    if not pipe.temporary and not pipe.id:
1883        register_tuple = pipe.register(debug=debug)
1884        if not register_tuple[0]:
1885            return register_tuple
1886
1887    ### df is the dataframe returned from the remote source
1888    ### via the connector
1889    if debug:
1890        dprint("Fetched data:\n" + str(df))
1891
1892    if not isinstance(df, pd.DataFrame):
1893        df = pipe.enforce_dtypes(
1894            df,
1895            chunksize=chunksize,
1896            safe_copy=kw.get('safe_copy', False),
1897            dtypes=dtypes,
1898            debug=debug,
1899        )
1900
1901    ### if table does not exist, create it with indices
1902    is_new = False
1903    if not pipe.exists(debug=debug):
1904        check_existing = False
1905        is_new = True
1906    else:
1907        ### Check for new columns.
1908        add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
1909        if add_cols_queries:
1910            pipe._clear_cache_key('_columns_types', debug=debug)
1911            pipe._clear_cache_key('_columns_indices', debug=debug)
1912            if not self.exec_queries(add_cols_queries, debug=debug):
1913                warn(f"Failed to add new columns to {pipe}.")
1914
1915        alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
1916        if alter_cols_queries:
1917            pipe._clear_cache_key('_columns_types', debug=debug)
1918            pipe._clear_cache_key('_columns_types', debug=debug)
1919            if not self.exec_queries(alter_cols_queries, debug=debug):
1920                warn(f"Failed to alter columns for {pipe}.")
1921
1922    upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
1923    if upsert:
1924        check_existing = False
1925    kw['safe_copy'] = kw.get('safe_copy', False)
1926
1927    unseen_df, update_df, delta_df = (
1928        pipe.filter_existing(
1929            df,
1930            chunksize=chunksize,
1931            debug=debug,
1932            **kw
1933        ) if check_existing else (df, None, df)
1934    )
1935    if upsert:
1936        unseen_df, update_df, delta_df = (df.head(0), df, df)
1937
1938    if debug:
1939        dprint("Delta data:\n" + str(delta_df))
1940        dprint("Unseen data:\n" + str(unseen_df))
1941        if update_df is not None:
1942            dprint(("Update" if not upsert else "Upsert") + " data:\n" + str(update_df))
1943
1944    if_exists = kw.get('if_exists', 'append')
1945    if 'if_exists' in kw:
1946        kw.pop('if_exists')
1947    if 'name' in kw:
1948        kw.pop('name')
1949
1950    ### Insert new data into the target table.
1951    unseen_kw = copy.deepcopy(kw)
1952    unseen_kw.update({
1953        'name': pipe.target,
1954        'if_exists': if_exists,
1955        'debug': debug,
1956        'as_dict': True,
1957        'safe_copy': kw.get('safe_copy', False),
1958        'chunksize': chunksize,
1959        'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1960        'schema': self.get_pipe_schema(pipe),
1961    })
1962
1963    dt_col = pipe.columns.get('datetime', None)
1964    primary_key = pipe.columns.get('primary', None)
1965    autoincrement = (
1966        pipe.parameters.get('autoincrement', False)
1967        or (
1968            is_new
1969            and primary_key
1970            and primary_key
1971            not in dtypes
1972            and primary_key not in unseen_df.columns
1973        )
1974    )
1975    if autoincrement and autoincrement not in pipe.parameters:
1976        update_success, update_msg = pipe.update_parameters(
1977            {'autoincrement': autoincrement},
1978            debug=debug,
1979        )
1980        if not update_success:
1981            return update_success, update_msg
1982
1983    def _check_pk(_df_to_clear):
1984        if _df_to_clear is None:
1985            return
1986        if primary_key not in _df_to_clear.columns:
1987            return
1988        if not _df_to_clear[primary_key].notnull().any():
1989            del _df_to_clear[primary_key]
1990
1991    autoincrement_needs_reset = bool(
1992        autoincrement
1993        and primary_key
1994        and primary_key in unseen_df.columns
1995        and unseen_df[primary_key].notnull().any()
1996    )
1997    if autoincrement and primary_key:
1998        for _df_to_clear in (unseen_df, update_df, delta_df):
1999            _check_pk(_df_to_clear)
2000
2001    if is_new:
2002        create_success, create_msg = self.create_pipe_table_from_df(
2003            pipe,
2004            unseen_df,
2005            debug=debug,
2006        )
2007        if not create_success:
2008            return create_success, create_msg
2009
2010    ### Pre-create native range partitions (non-TimescaleDB) so the rows about to be written
2011    ### land in an existing partition. No-op for non-partitioned pipes.
2012    if self._should_partition(pipe):
2013        for _part_df in (unseen_df, update_df):
2014            if _part_df is not None and len(_part_df) > 0:
2015                part_success, part_msg = self._create_missing_partitions(
2016                    pipe, _part_df, debug=debug,
2017                )
2018                if not part_success:
2019                    return part_success, part_msg
2020
2021    do_identity_insert = bool(
2022        self.flavor in ('mssql',)
2023        and primary_key
2024        and primary_key in unseen_df.columns
2025        and autoincrement
2026    )
2027    stats = {'success': True, 'msg': ''}
2028    if len(unseen_df) > 0:
2029        with self.engine.connect() as connection:
2030            with connection.begin():
2031                if do_identity_insert:
2032                    identity_on_result = self.exec(
2033                        f"SET IDENTITY_INSERT {pipe_name} ON",
2034                        commit=False,
2035                        _connection=connection,
2036                        close=False,
2037                        debug=debug,
2038                    )
2039                    if identity_on_result is None:
2040                        return False, f"Could not enable identity inserts on {pipe}."
2041
2042                stats = self.to_sql(
2043                    unseen_df,
2044                    _connection=connection,
2045                    **unseen_kw
2046                )
2047
2048                if do_identity_insert:
2049                    identity_off_result = self.exec(
2050                        f"SET IDENTITY_INSERT {pipe_name} OFF",
2051                        commit=False,
2052                        _connection=connection,
2053                        close=False,
2054                        debug=debug,
2055                    )
2056                    if identity_off_result is None:
2057                        return False, f"Could not disable identity inserts on {pipe}."
2058
2059    if is_new:
2060        if not self.create_indices(pipe, debug=debug):
2061            warn(f"Failed to create indices for {pipe}. Continuing...")
2062
2063    if autoincrement_needs_reset:
2064        reset_autoincrement_queries = get_reset_autoincrement_queries(
2065            pipe.target,
2066            primary_key,
2067            self,
2068            schema=self.get_pipe_schema(pipe),
2069            debug=debug,
2070        )
2071        results = self.exec_queries(reset_autoincrement_queries, debug=debug)
2072        for result in results:
2073            if result is None:
2074                warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
2075
2076    if update_df is not None and len(update_df) > 0:
2077        temp_target = self.get_temporary_target(
2078            pipe.target,
2079            label=('update' if not upsert else 'upsert'),
2080        )
2081        self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
2082        update_dtypes = {
2083            **{
2084                col: str(typ)
2085                for col, typ in update_df.dtypes.items()
2086            },
2087            **get_special_cols(update_df)
2088        }
2089
2090        temp_pipe = Pipe(
2091            pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
2092            instance=pipe.instance_keys,
2093            columns={
2094                (ix_key if ix_key != 'primary' else 'primary_'): ix
2095                for ix_key, ix in pipe.columns.items()
2096                if ix and ix in update_df.columns
2097            },
2098            dtypes=update_dtypes,
2099            target=temp_target,
2100            temporary=True,
2101            enforce=False,
2102            static=True,
2103            autoincrement=False,
2104            cache=False,
2105            parameters={
2106                'schema': self.internal_schema,
2107                'hypertable': False,
2108            },
2109        )
2110        _temp_columns_types = {
2111            col: get_db_type_from_pd_type(typ, self.flavor)
2112            for col, typ in update_dtypes.items()
2113        }
2114        temp_pipe._cache_value('_columns_types', _temp_columns_types, memory_only=True, debug=debug)
2115        temp_pipe._cache_value('_skip_check_indices', True, memory_only=True, debug=debug)
2116        now_ts = get_current_timestamp('ms', as_int=True) / 1000
2117        temp_pipe._cache_value('_columns_types_timestamp', now_ts, memory_only=True, debug=debug)
2118        temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
2119        if not temp_success:
2120            return temp_success, temp_msg
2121
2122        existing_cols = pipe.get_columns_types(debug=debug)
2123        ### A partitioned table (TimescaleDB hypertable or a native range-partitioned table on
2124        ### PostgreSQL/MySQL/MSSQL) folds the datetime column into its composite primary key, so the
2125        ### upsert conflict target must include it too — `ON CONFLICT (primary_key)` alone has no
2126        ### matching unique constraint. Non-partitioned tables keep the historical primary-key-only
2127        ### semantics ("the primary key is the identity, regardless of datetime").
2128        partition_upsert = bool(
2129            dt_col
2130            and dt_col in update_df.columns
2131            and (
2132                self.flavor in ('timescaledb', 'timescaledb-ha')
2133                or self._should_partition(pipe)
2134            )
2135        )
2136        join_cols = [
2137            col
2138            for col_key, col in pipe.columns.items()
2139            if col and col in existing_cols
2140        ] if not primary_key or self.flavor == 'oracle' else (
2141            [dt_col, primary_key]
2142            if partition_upsert
2143            else [primary_key]
2144        )
2145        update_queries = get_update_queries(
2146            pipe.target,
2147            temp_target,
2148            self,
2149            join_cols,
2150            upsert=upsert,
2151            schema=self.get_pipe_schema(pipe),
2152            patch_schema=self.internal_schema,
2153            target_cols_types=pipe.get_columns_types(debug=debug),
2154            patch_cols_types=_temp_columns_types,
2155            datetime_col=(dt_col if dt_col in update_df.columns else None),
2156            identity_insert=(autoincrement and primary_key in update_df.columns),
2157            null_indices=pipe.null_indices,
2158            cast_columns=pipe.enforce,
2159            debug=debug,
2160        )
2161        update_results = self.exec_queries(
2162            update_queries,
2163            break_on_error=True,
2164            rollback=True,
2165            debug=debug,
2166        )
2167        update_success = all(update_results)
2168        self._log_temporary_tables_creation(
2169            temp_target,
2170            ready_to_drop=True,
2171            create=(not pipe.temporary),
2172            debug=debug,
2173        )
2174        if not update_success:
2175            warn(f"Failed to apply update to {pipe}.")
2176        stats['success'] = stats['success'] and update_success
2177        stats['msg'] = (
2178            (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
2179            if not update_success
2180            else stats.get('msg', '')
2181        )
2182
2183    stop = time.perf_counter()
2184    success = stats['success']
2185    if not success:
2186        return success, stats['msg'] or str(stats)
2187
2188    unseen_count = len(unseen_df.index) if unseen_df is not None else 0
2189    update_count = len(update_df.index) if update_df is not None else 0
2190    msg = (
2191        (
2192            f"Inserted {unseen_count:,}, "
2193            + f"updated {update_count:,} rows."
2194        )
2195        if not upsert
2196        else (
2197            f"Upserted {update_count:,} row"
2198            + ('s' if update_count != 1 else '')
2199            + "."
2200        )
2201    )
2202    if debug:
2203        msg = msg[:-1] + (
2204            f"\non table {sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))}\n"
2205            + f"in {round(stop - start, 2)} seconds."
2206        )
2207
2208    if _check_temporary_tables:
2209        drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(
2210            refresh=False, debug=debug
2211        )
2212        if not drop_stale_success:
2213            warn(drop_stale_msg)
2214
2215    return success, msg

Sync a pipe using a database connection.

Parameters
  • pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
  • df (Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]): An optional DataFrame or equivalent to sync into the pipe. Defaults to None.
  • begin (Union[datetime, int, None], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Union[datetime, int, None], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe. Defaults to True.
  • blocking (bool, default True): If True, wait for sync to finish and return its result, otherwise asyncronously sync. Defaults to True.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
  • kw (Any): Catch-all for keyword arguments.
Returns
  • A SuccessTuple of success (bool) and message (str).
def sync_pipe_inplace( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, chunksize: Optional[int] = -1, check_existing: bool = True, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
2218def sync_pipe_inplace(
2219    self,
2220    pipe: 'mrsm.Pipe',
2221    params: Optional[Dict[str, Any]] = None,
2222    begin: Union[datetime, int, None] = None,
2223    end: Union[datetime, int, None] = None,
2224    chunksize: Optional[int] = -1,
2225    check_existing: bool = True,
2226    debug: bool = False,
2227    **kw: Any
2228) -> SuccessTuple:
2229    """
2230    If a pipe's connector is the same as its instance connector,
2231    it's more efficient to sync the pipe in-place rather than reading data into Pandas.
2232
2233    Parameters
2234    ----------
2235    pipe: mrsm.Pipe
2236        The pipe whose connector is the same as its instance.
2237
2238    params: Optional[Dict[str, Any]], default None
2239        Optional params dictionary to build the `WHERE` clause.
2240        See `meerschaum.utils.sql.build_where`.
2241
2242    begin: Union[datetime, int, None], default None
2243        Optionally specify the earliest datetime to search for data.
2244        Defaults to `None`.
2245
2246    end: Union[datetime, int, None], default None
2247        Optionally specify the latest datetime to search for data.
2248        Defaults to `None`.
2249
2250    chunksize: Optional[int], default -1
2251        Specify the number of rows to sync per chunk.
2252        If `-1`, resort to system configuration (default is `900`).
2253        A `chunksize` of `None` will sync all rows in one transaction.
2254        Defaults to `-1`.
2255
2256    check_existing: bool, default True
2257        If `True`, pull and diff with existing data from the pipe.
2258
2259    debug: bool, default False
2260        Verbosity toggle.
2261
2262    Returns
2263    -------
2264    A SuccessTuple.
2265    """
2266    if self.flavor == 'duckdb':
2267        return pipe.sync(
2268            params=params,
2269            begin=begin,
2270            end=end,
2271            chunksize=chunksize,
2272            check_existing=check_existing,
2273            debug=debug,
2274            _inplace=False,
2275            **kw
2276        )
2277    from meerschaum.utils.sql import (
2278        sql_item_name,
2279        get_update_queries,
2280        get_null_replacement,
2281        get_create_table_queries,
2282        get_create_schema_if_not_exists_queries,
2283        get_table_cols_types,
2284        session_execute,
2285        dateadd_str,
2286        UPDATE_QUERIES,
2287    )
2288    from meerschaum.utils.dtypes.sql import (
2289        get_pd_type_from_db_type,
2290        get_db_type_from_pd_type,
2291    )
2292    from meerschaum.utils.misc import generate_password
2293
2294    transaction_id_length = (
2295        mrsm.get_config(
2296            'system', 'connectors', 'sql', 'instance', 'temporary_target', 'transaction_id_length'
2297        )
2298    )
2299    transact_id = generate_password(transaction_id_length)
2300
2301    internal_schema = self.internal_schema
2302    target = pipe.target
2303    temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
2304    temp_tables = {
2305        table_root: self.get_temporary_target(target, transact_id=transact_id, label=table_root)
2306        for table_root in temp_table_roots
2307    }
2308    temp_table_names = {
2309        table_root: sql_item_name(table_name_raw, self.flavor, internal_schema)
2310        for table_root, table_name_raw in temp_tables.items()
2311    }
2312    temp_table_aliases = {
2313        table_root: sql_item_name(table_root, self.flavor)
2314        for table_root in temp_table_roots
2315    }
2316    table_alias_as = " AS" if self.flavor != 'oracle' else ''
2317    metadef = self.get_pipe_metadef(
2318        pipe,
2319        params=params,
2320        begin=begin,
2321        end=end,
2322        check_existing=check_existing,
2323        debug=debug,
2324    )
2325    pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2326    upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in UPDATE_QUERIES
2327    static = pipe.parameters.get('static', False)
2328    database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
2329    primary_key = pipe.columns.get('primary', None)
2330    primary_key_typ = pipe.dtypes.get(primary_key, None) if primary_key else None
2331    primary_key_db_type = (
2332        get_db_type_from_pd_type(primary_key_typ, self.flavor)
2333        if primary_key_typ
2334        else None
2335    )
2336    if not {col_key: col for col_key, col in pipe.columns.items() if col_key and col}:
2337        return False, "Cannot sync in-place without index columns."
2338
2339    autoincrement = pipe.parameters.get('autoincrement', False)
2340    dt_col = pipe.columns.get('datetime', None)
2341    dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
2342    dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2343    dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
2344
2345    def clean_up_temp_tables(ready_to_drop: bool = False):
2346        log_success, log_msg = self._log_temporary_tables_creation(
2347            [
2348                table
2349                for table in temp_tables.values()
2350            ] if not upsert else [temp_tables['update']],
2351            ready_to_drop=ready_to_drop,
2352            create=(not pipe.temporary),
2353            debug=debug,
2354        )
2355        if not log_success:
2356            warn(log_msg)
2357        drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(
2358            refresh=False,
2359            debug=debug,
2360        )
2361        if not drop_stale_success:
2362            warn(drop_stale_msg)
2363        return drop_stale_success, drop_stale_msg
2364
2365    sqlalchemy, sqlalchemy_orm = mrsm.attempt_import(
2366        'sqlalchemy',
2367        'sqlalchemy.orm',
2368    )
2369    if not pipe.exists(debug=debug):
2370        schema = self.get_pipe_schema(pipe)
2371        create_pipe_queries = get_create_table_queries(
2372            metadef,
2373            pipe.target,
2374            self.flavor,
2375            schema=schema,
2376            primary_key=primary_key,
2377            primary_key_db_type=primary_key_db_type,
2378            autoincrement=autoincrement,
2379            datetime_column=dt_col,
2380        )
2381        if schema:
2382            create_pipe_queries = (
2383                get_create_schema_if_not_exists_queries(schema, self.flavor)
2384                + create_pipe_queries
2385            )
2386
2387        results = self.exec_queries(create_pipe_queries, debug=debug)
2388        if not all(results):
2389            _ = clean_up_temp_tables()
2390            return False, f"Could not insert new data into {pipe} from its SQL query definition."
2391
2392        if not self.create_indices(pipe, debug=debug):
2393            warn(f"Failed to create indices for {pipe}. Continuing...")
2394
2395        rowcount = pipe.get_rowcount(debug=debug)
2396        _ = clean_up_temp_tables()
2397        return True, f"Inserted {rowcount:,}, updated 0 rows."
2398
2399    session = sqlalchemy_orm.Session(self.engine)
2400    connectable = session if self.flavor != 'duckdb' else self
2401
2402    create_new_query = get_create_table_queries(
2403        metadef,
2404        temp_tables[('new') if not upsert else 'update'],
2405        self.flavor,
2406        schema=internal_schema,
2407    )[0]
2408    (create_new_success, create_new_msg), create_new_results = session_execute(
2409        session,
2410        create_new_query,
2411        with_results=True,
2412        debug=debug,
2413    )
2414    if not create_new_success:
2415        _ = clean_up_temp_tables()
2416        return create_new_success, create_new_msg
2417    new_count = create_new_results[0].rowcount if create_new_results else 0
2418
2419    new_cols_types = get_table_cols_types(
2420        temp_tables[('new' if not upsert else 'update')],
2421        connectable=connectable,
2422        flavor=self.flavor,
2423        schema=internal_schema,
2424        database=database,
2425        debug=debug,
2426    ) if not static else pipe.get_columns_types(debug=debug)
2427    if not new_cols_types:
2428        return False, f"Failed to get new columns for {pipe}."
2429
2430    new_cols = {
2431        str(col_name): get_pd_type_from_db_type(str(col_type))
2432        for col_name, col_type in new_cols_types.items()
2433    }
2434    new_cols_str = '\n    ' + ',\n    '.join([
2435        sql_item_name(col, self.flavor)
2436        for col in new_cols
2437    ])
2438    def get_col_typ(col: str, cols_types: Dict[str, str]) -> str:
2439        if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char':
2440            return new_cols_types[col]
2441        return cols_types[col]
2442
2443    add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
2444    if add_cols_queries:
2445        pipe._clear_cache_key('_columns_types', debug=debug)
2446        pipe._clear_cache_key('_columns_indices', debug=debug)
2447        self.exec_queries(add_cols_queries, debug=debug)
2448
2449    alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
2450    if alter_cols_queries:
2451        pipe._clear_cache_key('_columns_types', debug=debug)
2452        self.exec_queries(alter_cols_queries, debug=debug)
2453
2454    insert_queries = [
2455        (
2456            f"INSERT INTO {pipe_name} ({new_cols_str})\n"
2457            f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}{table_alias_as}"
2458            f" {temp_table_aliases['new']}"
2459        )
2460    ] if not check_existing and not upsert else []
2461
2462    new_queries = insert_queries
2463    new_success, new_msg = (
2464        session_execute(session, new_queries, debug=debug)
2465        if new_queries
2466        else (True, "Success")
2467    )
2468    if not new_success:
2469        _ = clean_up_temp_tables()
2470        return new_success, new_msg
2471
2472    if not check_existing:
2473        session.commit()
2474        _ = clean_up_temp_tables()
2475        return True, f"Inserted {new_count}, updated 0 rows."
2476
2477    min_dt_col_name_da = dateadd_str(
2478        flavor=self.flavor, begin=f"MIN({dt_col_name})", db_type=dt_db_type,
2479    )
2480    max_dt_col_name_da = dateadd_str(
2481        flavor=self.flavor, begin=f"MAX({dt_col_name})", db_type=dt_db_type,
2482    )
2483
2484    (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
2485        session,
2486        [
2487            "SELECT\n"
2488            f"    {min_dt_col_name_da} AS {sql_item_name('min_dt', self.flavor)},\n"
2489            f"    {max_dt_col_name_da} AS {sql_item_name('max_dt', self.flavor)}\n"
2490            f"FROM {temp_table_names['new' if not upsert else 'update']}\n"
2491            f"WHERE {dt_col_name} IS NOT NULL"
2492        ],
2493        with_results=True,
2494        debug=debug,
2495    ) if dt_col and not upsert else ((True, "Success"), None)
2496    if not new_dt_bounds_success:
2497        return (
2498            new_dt_bounds_success,
2499            f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}"
2500        )
2501
2502    if dt_col and not upsert:
2503        begin, end = new_dt_bounds_results[0].fetchone()
2504
2505    backtrack_def = self.get_pipe_data_query(
2506        pipe,
2507        begin=begin,
2508        end=end,
2509        begin_add_minutes=0,
2510        end_add_minutes=1,
2511        params=params,
2512        debug=debug,
2513        order=None,
2514    )
2515    create_backtrack_query = get_create_table_queries(
2516        backtrack_def,
2517        temp_tables['backtrack'],
2518        self.flavor,
2519        schema=internal_schema,
2520    )[0]
2521    (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute(
2522        session,
2523        create_backtrack_query,
2524        with_results=True,
2525        debug=debug,
2526    ) if not upsert else ((True, "Success"), None)
2527
2528    if not create_backtrack_success:
2529        _ = clean_up_temp_tables()
2530        return create_backtrack_success, create_backtrack_msg
2531
2532    backtrack_cols_types = get_table_cols_types(
2533        temp_tables['backtrack'],
2534        connectable=connectable,
2535        flavor=self.flavor,
2536        schema=internal_schema,
2537        database=database,
2538        debug=debug,
2539    ) if not (upsert or static) else new_cols_types
2540
2541    common_cols = [col for col in new_cols if col in backtrack_cols_types]
2542    primary_key = pipe.columns.get('primary', None)
2543    on_cols = {
2544        col: new_cols.get(col)
2545        for col_key, col in pipe.columns.items()
2546        if (
2547            col
2548            and
2549            col_key != 'value'
2550            and col in backtrack_cols_types
2551            and col in new_cols
2552        )
2553    } if not primary_key else {primary_key: new_cols.get(primary_key)}
2554    if not on_cols:
2555        raise ValueError("Cannot sync without common index columns.")
2556
2557    null_replace_new_cols_str = (
2558        '\n    ' + ',\n    '.join([
2559            f"COALESCE({temp_table_aliases['new']}.{sql_item_name(col, self.flavor)}, "
2560            + get_null_replacement(get_col_typ(col, new_cols_types), self.flavor)
2561            + ") AS "
2562            + sql_item_name(col, self.flavor, None)
2563            for col, typ in new_cols.items()
2564        ])
2565    )
2566
2567    select_delta_query = (
2568        "SELECT"
2569        + null_replace_new_cols_str
2570        + f"\nFROM {temp_table_names['new']}{table_alias_as} {temp_table_aliases['new']}\n"
2571        + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as} {temp_table_aliases['backtrack']}"
2572        + "\n    ON\n    "
2573        + '\n    AND\n    '.join([
2574            (
2575                f"    COALESCE({temp_table_aliases['new']}."
2576                + sql_item_name(c, self.flavor, None)
2577                + ", "
2578                + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor)
2579                + ")"
2580                + '\n        =\n    '
2581                + f"    COALESCE({temp_table_aliases['backtrack']}."
2582                + sql_item_name(c, self.flavor, None)
2583                + ", "
2584                + get_null_replacement(get_col_typ(c, backtrack_cols_types), self.flavor)
2585                + ") "
2586            ) for c in common_cols
2587        ])
2588        + "\nWHERE\n    "
2589        + '\n    AND\n    '.join([
2590            (
2591                f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor) + ' IS NULL'
2592            ) for c in common_cols
2593        ])
2594    )
2595    create_delta_query = get_create_table_queries(
2596        select_delta_query,
2597        temp_tables['delta'],
2598        self.flavor,
2599        schema=internal_schema,
2600    )[0]
2601    create_delta_success, create_delta_msg = session_execute(
2602        session,
2603        create_delta_query,
2604        debug=debug,
2605    ) if not upsert else (True, "Success")
2606    if not create_delta_success:
2607        _ = clean_up_temp_tables()
2608        return create_delta_success, create_delta_msg
2609
2610    delta_cols_types = get_table_cols_types(
2611        temp_tables['delta'],
2612        connectable=connectable,
2613        flavor=self.flavor,
2614        schema=internal_schema,
2615        database=database,
2616        debug=debug,
2617    ) if not (upsert or static) else new_cols_types
2618
2619    ### This is a weird bug on SQLite.
2620    ### Sometimes the backtrack dtypes are all empty strings.
2621    if not all(delta_cols_types.values()):
2622        delta_cols_types = new_cols_types
2623
2624    delta_cols = {
2625        col: get_pd_type_from_db_type(typ)
2626        for col, typ in delta_cols_types.items()
2627    }
2628    delta_cols_str = ', '.join([
2629        sql_item_name(col, self.flavor)
2630        for col in delta_cols
2631    ])
2632
2633    select_joined_query = (
2634        "SELECT\n    "
2635        + (',\n    '.join([
2636            (
2637                f"{temp_table_aliases['delta']}." + sql_item_name(c, self.flavor, None)
2638                + " AS " + sql_item_name(c + '_delta', self.flavor, None)
2639            ) for c in delta_cols
2640        ]))
2641        + ",\n    "
2642        + (',\n    '.join([
2643            (
2644                f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor, None)
2645                + " AS " + sql_item_name(c + '_backtrack', self.flavor, None)
2646            ) for c in backtrack_cols_types
2647        ]))
2648        + f"\nFROM {temp_table_names['delta']}{table_alias_as} {temp_table_aliases['delta']}\n"
2649        + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as}"
2650        + f" {temp_table_aliases['backtrack']}"
2651        + "\n    ON\n    "
2652        + '\n    AND\n    '.join([
2653            (
2654                f"    COALESCE({temp_table_aliases['delta']}." + sql_item_name(c, self.flavor)
2655                + ", "
2656                + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
2657                + '\n        =\n    '
2658                + f"    COALESCE({temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor)
2659                + ", "
2660                + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
2661            ) for c, typ in on_cols.items()
2662        ])
2663    )
2664
2665    create_joined_query = get_create_table_queries(
2666        select_joined_query,
2667        temp_tables['joined'],
2668        self.flavor,
2669        schema=internal_schema,
2670    )[0]
2671    create_joined_success, create_joined_msg = session_execute(
2672        session,
2673        create_joined_query,
2674        debug=debug,
2675    ) if on_cols and not upsert else (True, "Success")
2676    if not create_joined_success:
2677        _ = clean_up_temp_tables()
2678        return create_joined_success, create_joined_msg
2679
2680    select_unseen_query = (
2681        "SELECT\n    "
2682        + (',\n    '.join([
2683            (
2684                "CASE\n        WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2685                + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
2686                + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2687                + "\n        ELSE NULL\n    END"
2688                + " AS " + sql_item_name(c, self.flavor, None)
2689            ) for c, typ in delta_cols.items()
2690        ]))
2691        + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
2692        + "WHERE\n    "
2693        + '\n    AND\n    '.join([
2694            (
2695                sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
2696            ) for c in delta_cols
2697        ])
2698    )
2699    create_unseen_query = get_create_table_queries(
2700        select_unseen_query,
2701        temp_tables['unseen'],
2702        self.flavor,
2703        internal_schema,
2704    )[0]
2705    (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
2706        session,
2707        create_unseen_query,
2708        with_results=True,
2709        debug=debug
2710    ) if not upsert else ((True, "Success"), None)
2711    if not create_unseen_success:
2712        _ = clean_up_temp_tables()
2713        return create_unseen_success, create_unseen_msg
2714
2715    select_update_query = (
2716        "SELECT\n    "
2717        + (',\n    '.join([
2718            (
2719                "CASE\n        WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2720                + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
2721                + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2722                + "\n        ELSE NULL\n    END"
2723                + " AS " + sql_item_name(c, self.flavor, None)
2724            ) for c, typ in delta_cols.items()
2725        ]))
2726        + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
2727        + "WHERE\n    "
2728        + '\n    OR\n    '.join([
2729            (
2730                sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
2731            ) for c in delta_cols
2732        ])
2733    )
2734
2735    create_update_query = get_create_table_queries(
2736        select_update_query,
2737        temp_tables['update'],
2738        self.flavor,
2739        internal_schema,
2740    )[0]
2741    (create_update_success, create_update_msg), create_update_results = session_execute(
2742        session,
2743        create_update_query,
2744        with_results=True,
2745        debug=debug,
2746    ) if on_cols and not upsert else ((True, "Success"), [])
2747    apply_update_queries = (
2748        get_update_queries(
2749            pipe.target,
2750            temp_tables['update'],
2751            session,
2752            on_cols,
2753            upsert=upsert,
2754            schema=self.get_pipe_schema(pipe),
2755            patch_schema=internal_schema,
2756            target_cols_types=pipe.get_columns_types(debug=debug),
2757            patch_cols_types=delta_cols_types,
2758            datetime_col=pipe.columns.get('datetime', None),
2759            flavor=self.flavor,
2760            null_indices=pipe.null_indices,
2761            cast_columns=pipe.enforce,
2762            debug=debug,
2763        )
2764        if on_cols else []
2765    )
2766
2767    apply_unseen_queries = [
2768        (
2769            f"INSERT INTO {pipe_name} ({delta_cols_str})\n"
2770            + f"SELECT {delta_cols_str}\nFROM "
2771            + (
2772                temp_table_names['unseen']
2773                if on_cols
2774                else temp_table_names['delta']
2775            )
2776        ),
2777    ]
2778
2779    (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute(
2780        session,
2781        apply_unseen_queries,
2782        with_results=True,
2783        debug=debug,
2784    ) if not upsert else ((True, "Success"), None)
2785    if not apply_unseen_success:
2786        _ = clean_up_temp_tables()
2787        return apply_unseen_success, apply_unseen_msg
2788    unseen_count = apply_unseen_results[0].rowcount if apply_unseen_results else 0
2789
2790    (apply_update_success, apply_update_msg), apply_update_results = session_execute(
2791        session,
2792        apply_update_queries,
2793        with_results=True,
2794        debug=debug,
2795    )
2796    if not apply_update_success:
2797        _ = clean_up_temp_tables()
2798        return apply_update_success, apply_update_msg
2799    update_count = apply_update_results[0].rowcount if apply_update_results else 0
2800
2801    session.commit()
2802
2803    msg = (
2804        f"Inserted {unseen_count:,}, updated {update_count:,} rows."
2805        if not upsert
2806        else f"Upserted {update_count:,} row" + ('s' if update_count != 1 else '') + "."
2807    )
2808    _ = clean_up_temp_tables(ready_to_drop=True)
2809
2810    return True, msg

If a pipe's connector is the same as its instance connector, it's more efficient to sync the pipe in-place rather than reading data into Pandas.

Parameters
  • pipe (mrsm.Pipe): The pipe whose connector is the same as its instance.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.
  • begin (Union[datetime, int, None], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Union[datetime, int, None], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple.
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, remote: bool = False, debug: bool = False) -> Union[datetime.datetime, int, NoneType]:
2813def get_sync_time(
2814    self,
2815    pipe: 'mrsm.Pipe',
2816    params: Optional[Dict[str, Any]] = None,
2817    newest: bool = True,
2818    remote: bool = False,
2819    debug: bool = False,
2820) -> Union[datetime, int, None]:
2821    """Get a Pipe's most recent datetime value.
2822
2823    Parameters
2824    ----------
2825    pipe: mrsm.Pipe
2826        The pipe to get the sync time for.
2827
2828    params: Optional[Dict[str, Any]], default None
2829        Optional params dictionary to build the `WHERE` clause.
2830        See `meerschaum.utils.sql.build_where`.
2831
2832    newest: bool, default True
2833        If `True`, get the most recent datetime (honoring `params`).
2834        If `False`, get the oldest datetime (ASC instead of DESC).
2835
2836    remote: bool, default False
2837        If `True`, return the sync time for the remote fetch definition.
2838
2839    Returns
2840    -------
2841    A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`.
2842    """
2843    from meerschaum.utils.sql import sql_item_name, build_where, wrap_query_with_cte
2844    src_name = sql_item_name('src', self.flavor)
2845    table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2846
2847    dt_col = pipe.columns.get('datetime', None)
2848    if dt_col is None:
2849        return None
2850    dt_col_name = sql_item_name(dt_col, self.flavor, None)
2851
2852    if remote and pipe.connector.type != 'sql':
2853        warn(f"Cannot get the remote sync time for {pipe}.")
2854        return None
2855
2856    ASC_or_DESC = "DESC" if newest else "ASC"
2857    existing_cols = pipe.get_columns_types(debug=debug)
2858    if not remote and not existing_cols:
2859        return None
2860    valid_params = {}
2861    if params is not None:
2862        valid_params = {k: v for k, v in params.items() if k in existing_cols}
2863    flavor = self.flavor if not remote else pipe.connector.flavor
2864
2865    ### If no bounds are provided for the datetime column,
2866    ### add IS NOT NULL to the WHERE clause.
2867    if dt_col not in valid_params:
2868        valid_params[dt_col] = '_None'
2869    where = "" if not valid_params else build_where(valid_params, self)
2870    src_query = (
2871        f"SELECT {dt_col_name}\nFROM {table_name}{where}"
2872        if not remote
2873        else self.get_pipe_metadef(pipe, params=params, begin=None, end=None)
2874    )
2875
2876    base_query = (
2877        f"SELECT {dt_col_name}\n"
2878        f"FROM {src_name}\n"
2879        f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2880        f"LIMIT 1"
2881    )
2882    if self.flavor == 'mssql':
2883        base_query = (
2884            f"SELECT TOP 1 {dt_col_name}\n"
2885            f"FROM {src_name}\n"
2886            f"ORDER BY {dt_col_name} {ASC_or_DESC}"
2887        )
2888    elif self.flavor == 'oracle':
2889        base_query = (
2890            "SELECT * FROM (\n"
2891            f"    SELECT {dt_col_name}\n"
2892            f"    FROM {src_name}\n"
2893            f"    ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2894            ") WHERE ROWNUM = 1"
2895        )
2896
2897    ### NOTE: MariaDB has an optimizer bug where `ORDER BY <dt> DESC/ASC LIMIT 1` against a
2898    ### `RANGE COLUMNS` partitioned table combined with a `WHERE` clause performs a partition
2899    ### index scan that stops early and returns zero rows (observed on MariaDB 12.x). The
2900    ### equivalent `MIN`/`MAX` aggregate scans the pruned partitions correctly, so use it for
2901    ### the bounds on partitioned MariaDB tables instead.
2902    if self.flavor == 'mariadb' and not remote and self._should_partition(pipe):
2903        agg_func = "MAX" if newest else "MIN"
2904        base_query = (
2905            f"SELECT {agg_func}({dt_col_name}) AS {dt_col_name}\n"
2906            f"FROM {src_name}"
2907        )
2908
2909    query = wrap_query_with_cte(src_query, base_query, flavor)
2910
2911    try:
2912        db_time = self.value(query, silent=True, debug=debug)
2913
2914        ### No datetime could be found.
2915        if db_time is None:
2916            return None
2917        ### sqlite returns str.
2918        if isinstance(db_time, str):
2919            dateutil_parser = mrsm.attempt_import('dateutil.parser')
2920            st = dateutil_parser.parse(db_time)
2921        ### Do nothing if a datetime object is returned.
2922        elif isinstance(db_time, datetime):
2923            if hasattr(db_time, 'to_pydatetime'):
2924                st = db_time.to_pydatetime()
2925            else:
2926                st = db_time
2927        ### Sometimes the datetime is actually a date.
2928        elif isinstance(db_time, date):
2929            st = datetime.combine(db_time, datetime.min.time())
2930        ### Adding support for an integer datetime axis.
2931        elif 'int' in str(type(db_time)).lower():
2932            st = int(db_time)
2933        ### Convert pandas timestamp to Python datetime.
2934        else:
2935            st = db_time.to_pydatetime()
2936
2937        sync_time = st
2938
2939    except Exception as e:
2940        sync_time = None
2941        warn(str(e))
2942
2943    return sync_time

Get a Pipe's most recent datetime value.

Parameters
  • pipe (mrsm.Pipe): The pipe to get the sync time for.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.
  • newest (bool, default True): If True, get the most recent datetime (honoring params). If False, get the oldest datetime (ASC instead of DESC).
  • remote (bool, default False): If True, return the sync time for the remote fetch definition.
Returns
  • A datetime object (or int if using an integer axis) if the pipe exists, otherwise None.
def pipe_exists(self, pipe: meerschaum.Pipe, debug: bool = False) -> bool:
2946def pipe_exists(
2947    self,
2948    pipe: mrsm.Pipe,
2949    debug: bool = False
2950) -> bool:
2951    """
2952    Check that a Pipe's table exists.
2953
2954    Parameters
2955    ----------
2956    pipe: mrsm.Pipe:
2957        The pipe to check.
2958
2959    debug: bool, default False
2960        Verbosity toggle.
2961
2962    Returns
2963    -------
2964    A `bool` corresponding to whether a pipe's table exists.
2965
2966    """
2967    from meerschaum.utils.sql import table_exists
2968    exists = table_exists(
2969        pipe.target,
2970        self,
2971        schema=self.get_pipe_schema(pipe),
2972        debug=debug,
2973    )
2974    if debug:
2975        dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
2976    return exists

Check that a Pipe's table exists.

Parameters
  • pipe (mrsm.Pipe:): The pipe to check.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A bool corresponding to whether a pipe's table exists.
def get_pipe_rowcount( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, remote: bool = False, debug: bool = False) -> Optional[int]:
2979def get_pipe_rowcount(
2980    self,
2981    pipe: mrsm.Pipe,
2982    begin: Union[datetime, int, None] = None,
2983    end: Union[datetime, int, None] = None,
2984    params: Optional[Dict[str, Any]] = None,
2985    remote: bool = False,
2986    debug: bool = False
2987) -> Union[int, None]:
2988    """
2989    Get the rowcount for a pipe in accordance with given parameters.
2990
2991    Parameters
2992    ----------
2993    pipe: mrsm.Pipe
2994        The pipe to query with.
2995
2996    begin: Union[datetime, int, None], default None
2997        The begin datetime value.
2998
2999    end: Union[datetime, int, None], default None
3000        The end datetime value.
3001
3002    params: Optional[Dict[str, Any]], default None
3003        See `meerschaum.utils.sql.build_where`.
3004
3005    remote: bool, default False
3006        If `True`, get the rowcount for the remote table.
3007
3008    debug: bool, default False
3009        Verbosity toggle.
3010
3011    Returns
3012    -------
3013    An `int` for the number of rows if the `pipe` exists, otherwise `None`.
3014
3015    """
3016    from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte, build_where
3017    from meerschaum.connectors.sql._fetch import get_pipe_query
3018    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3019    if remote:
3020        msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount."
3021        if 'fetch' not in pipe.parameters:
3022            error(msg)
3023            return None
3024        if 'definition' not in pipe.parameters['fetch']:
3025            error(msg)
3026            return None
3027    elif not pipe.exists(debug=debug):
3028        return None
3029
3030    flavor = self.flavor if not remote else pipe.connector.flavor
3031    conn = self if not remote else pipe.connector
3032    _pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
3033    dt_col = pipe.columns.get('datetime', None)
3034    dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
3035    dt_db_type = get_db_type_from_pd_type(dt_typ, flavor) if dt_typ else None
3036    if not dt_col:
3037        dt_col = pipe.guess_datetime()
3038        dt_name = sql_item_name(dt_col, flavor, None) if dt_col else None
3039        is_guess = True
3040    else:
3041        dt_col = pipe.get_columns('datetime')
3042        dt_name = sql_item_name(dt_col, flavor, None)
3043        is_guess = False
3044
3045    if begin is not None or end is not None:
3046        if is_guess:
3047            if dt_col is None:
3048                warn(
3049                    f"No datetime could be determined for {pipe}."
3050                    + "\n    Ignoring begin and end...",
3051                    stack=False,
3052                )
3053                begin, end = None, None
3054            else:
3055                warn(
3056                    f"A datetime wasn't specified for {pipe}.\n"
3057                    + f"    Using column \"{dt_col}\" for datetime bounds...",
3058                    stack=False,
3059                )
3060
3061
3062    _datetime_name = sql_item_name(dt_col, flavor)
3063    _cols_names = [
3064        sql_item_name(col, flavor)
3065        for col in set(
3066            (
3067                [dt_col]
3068                if dt_col
3069                else []
3070            ) + (
3071                []
3072                if params is None
3073                else list(params.keys())
3074            )
3075        )
3076    ]
3077    if not _cols_names:
3078        _cols_names = ['*']
3079
3080    src = (
3081        f"SELECT {', '.join(_cols_names)}\nFROM {_pipe_name}"
3082        if not remote
3083        else get_pipe_query(pipe)
3084    )
3085    parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', flavor)}"
3086    query = wrap_query_with_cte(src, parent_query, flavor)
3087    if begin is not None or end is not None:
3088        query += "\nWHERE"
3089    if begin is not None:
3090        query += (
3091            f"\n    {dt_name} >= "
3092            + dateadd_str(flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)
3093        )
3094    if end is not None and begin is not None:
3095        query += "\n    AND"
3096    if end is not None:
3097        query += (
3098            f"\n    {dt_name} <  "
3099            + dateadd_str(flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)
3100        )
3101    if params is not None:
3102        existing_cols = pipe.get_columns_types(debug=debug)
3103        valid_params = {k: v for k, v in params.items() if k in existing_cols}
3104        if valid_params:
3105            query += build_where(valid_params, conn).replace('WHERE', (
3106                'AND' if (begin is not None or end is not None)
3107                    else 'WHERE'
3108                )
3109            )
3110
3111    result = conn.value(query, debug=debug, silent=True)
3112    try:
3113        return int(result)
3114    except Exception:
3115        return None

Get the rowcount for a pipe in accordance with given parameters.

Parameters
  • pipe (mrsm.Pipe): The pipe to query with.
  • begin (Union[datetime, int, None], default None): The begin datetime value.
  • end (Union[datetime, int, None], default None): The end datetime value.
  • params (Optional[Dict[str, Any]], default None): See meerschaum.utils.sql.build_where.
  • remote (bool, default False): If True, get the rowcount for the remote table.
  • debug (bool, default False): Verbosity toggle.
Returns
  • An int for the number of rows if the pipe exists, otherwise None.
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kw) -> Tuple[bool, str]:
3118def drop_pipe(
3119    self,
3120    pipe: mrsm.Pipe,
3121    debug: bool = False,
3122    **kw
3123) -> SuccessTuple:
3124    """
3125    Drop a pipe's tables but maintain its registration.
3126
3127    Parameters
3128    ----------
3129    pipe: mrsm.Pipe
3130        The pipe to drop.
3131
3132    Returns
3133    -------
3134    A `SuccessTuple` indicated success.
3135    """
3136    from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
3137    success = True
3138    target = pipe.target
3139    schema = self.get_pipe_schema(pipe)
3140    target_name = (
3141        sql_item_name(target, self.flavor, schema)
3142    )
3143    if table_exists(target, self, schema=schema, debug=debug):
3144        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3145        success = self.exec(
3146            f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
3147        ) is not None
3148
3149    ### Drop any MSSQL partition scheme + function the table referenced (no-op otherwise).
3150    if success:
3151        cleanup_queries = self._get_partition_cleanup_queries(pipe)
3152        if cleanup_queries:
3153            self.exec_queries(cleanup_queries, break_on_error=False, silent=True, debug=debug)
3154
3155    msg = "Success" if success else f"Failed to drop {pipe}."
3156    return success, msg

Drop a pipe's tables but maintain its registration.

Parameters
  • pipe (mrsm.Pipe): The pipe to drop.
Returns
  • A SuccessTuple indicated success.
def clear_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
3159def clear_pipe(
3160    self,
3161    pipe: mrsm.Pipe,
3162    begin: Union[datetime, int, None] = None,
3163    end: Union[datetime, int, None] = None,
3164    params: Optional[Dict[str, Any]] = None,
3165    debug: bool = False,
3166    **kw
3167) -> SuccessTuple:
3168    """
3169    Delete a pipe's data within a bounded or unbounded interval without dropping the table.
3170
3171    Parameters
3172    ----------
3173    pipe: mrsm.Pipe
3174        The pipe to clear.
3175        
3176    begin: Union[datetime, int, None], default None
3177        Beginning datetime. Inclusive.
3178
3179    end: Union[datetime, int, None], default None
3180         Ending datetime. Exclusive.
3181
3182    params: Optional[Dict[str, Any]], default None
3183         See `meerschaum.utils.sql.build_where`.
3184
3185    """
3186    if not pipe.exists(debug=debug):
3187        return True, f"{pipe} does not exist, so nothing was cleared."
3188
3189    from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str
3190    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3191    pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
3192
3193    dt_col = pipe.columns.get('datetime', None)
3194    dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
3195    dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
3196    if not pipe.columns.get('datetime', None):
3197        dt_col = pipe.guess_datetime()
3198        dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
3199        is_guess = True
3200    else:
3201        dt_col = pipe.get_columns('datetime')
3202        dt_name = sql_item_name(dt_col, self.flavor, None)
3203        is_guess = False
3204
3205    if begin is not None or end is not None:
3206        if is_guess:
3207            if dt_col is None:
3208                warn(
3209                    f"No datetime could be determined for {pipe}."
3210                    + "\n    Ignoring datetime bounds...",
3211                    stack=False,
3212                )
3213                begin, end = None, None
3214            else:
3215                warn(
3216                    f"A datetime wasn't specified for {pipe}.\n"
3217                    + f"    Using column \"{dt_col}\" for datetime bounds...",
3218                    stack=False,
3219                )
3220
3221    valid_params = {}
3222    if params is not None:
3223        existing_cols = pipe.get_columns_types(debug=debug)
3224        valid_params = {k: v for k, v in params.items() if k in existing_cols}
3225    clear_query = (
3226        f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n"
3227        + ('\n    AND ' + build_where(valid_params, self, with_where=False) if valid_params else '')
3228        + (
3229            (
3230                f'\n    AND {dt_name} >= '
3231                + dateadd_str(self.flavor, 'day', 0, begin, db_type=dt_db_type)
3232            )
3233            if begin is not None
3234            else ''
3235        ) + (
3236            (
3237                f'\n    AND {dt_name} <  '
3238                + dateadd_str(self.flavor, 'day', 0, end, db_type=dt_db_type)
3239            )
3240            if end is not None
3241            else ''
3242        )
3243    )
3244    success = self.exec(clear_query, silent=True, debug=debug) is not None
3245    msg = "Success" if success else f"Failed to clear {pipe}."
3246    return success, msg

Delete a pipe's data within a bounded or unbounded interval without dropping the table.

Parameters
  • pipe (mrsm.Pipe): The pipe to clear.
  • begin (Union[datetime, int, None], default None): Beginning datetime. Inclusive.
  • end (Union[datetime, int, None], default None): Ending datetime. Exclusive.
  • params (Optional[Dict[str, Any]], default None): See meerschaum.utils.sql.build_where.
def deduplicate_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
3889def deduplicate_pipe(
3890    self,
3891    pipe: mrsm.Pipe,
3892    begin: Union[datetime, int, None] = None,
3893    end: Union[datetime, int, None] = None,
3894    params: Optional[Dict[str, Any]] = None,
3895    debug: bool = False,
3896    **kwargs: Any
3897) -> SuccessTuple:
3898    """
3899    Delete duplicate values within a pipe's table.
3900
3901    Parameters
3902    ----------
3903    pipe: mrsm.Pipe
3904        The pipe whose table to deduplicate.
3905
3906    begin: Union[datetime, int, None], default None
3907        If provided, only deduplicate values greater than or equal to this value.
3908
3909    end: Union[datetime, int, None], default None
3910        If provided, only deduplicate values less than this value.
3911
3912    params: Optional[Dict[str, Any]], default None
3913        If provided, further limit deduplication to values which match this query dictionary.
3914
3915    debug: bool, default False
3916        Verbosity toggle.
3917
3918    Returns
3919    -------
3920    A `SuccessTuple` indicating success.
3921    """
3922    from meerschaum.utils.sql import (
3923        sql_item_name,
3924        get_rename_table_queries,
3925        DROP_IF_EXISTS_FLAVORS,
3926        get_create_table_query,
3927        format_cte_subquery,
3928        get_null_replacement,
3929    )
3930    from meerschaum.utils.misc import generate_password, flatten_list
3931
3932    pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
3933
3934    if not pipe.exists(debug=debug):
3935        return False, f"Table {pipe_table_name} does not exist."
3936
3937    dt_col = pipe.columns.get('datetime', None)
3938    cols_types = pipe.get_columns_types(debug=debug)
3939    existing_cols = pipe.get_columns_types(debug=debug)
3940
3941    get_rowcount_query = f"SELECT COUNT(*) FROM {pipe_table_name}"
3942    old_rowcount = self.value(get_rowcount_query, debug=debug)
3943    if old_rowcount is None:
3944        return False, f"Failed to get rowcount for table {pipe_table_name}."
3945
3946    ### Non-datetime indices that in fact exist.
3947    indices = [
3948        col
3949        for key, col in pipe.columns.items()
3950        if col and col != dt_col and col in cols_types
3951    ]
3952    indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices]
3953    existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols]
3954    duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
3955    previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
3956
3957    index_list_str = (
3958        sql_item_name(dt_col, self.flavor, None)
3959        if dt_col
3960        else ''
3961    )
3962    index_list_str_ordered = (
3963        (
3964            sql_item_name(dt_col, self.flavor, None) + " DESC"
3965        )
3966        if dt_col
3967        else ''
3968    )
3969    if indices:
3970        index_list_str += ', ' + ', '.join(indices_names)
3971        index_list_str_ordered += ', ' + ', '.join(indices_names)
3972    if index_list_str.startswith(','):
3973        index_list_str = index_list_str.lstrip(',').lstrip()
3974    if index_list_str_ordered.startswith(','):
3975        index_list_str_ordered = index_list_str_ordered.lstrip(',').lstrip()
3976
3977    cols_list_str = ', '.join(existing_cols_names)
3978
3979    try:
3980        ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()).
3981        is_old_mysql = (
3982            self.flavor in ('mysql', 'mariadb')
3983            and
3984            int(self.db_version.split('.')[0]) < 8
3985        )
3986    except Exception:
3987        is_old_mysql = False
3988
3989    src_query = f"""
3990        SELECT
3991            {cols_list_str},
3992            ROW_NUMBER() OVER (
3993                PARTITION BY
3994                {index_list_str}
3995                ORDER BY {index_list_str_ordered}
3996            ) AS {duplicate_row_number_name}
3997        FROM {pipe_table_name}
3998    """
3999    duplicates_cte_subquery = format_cte_subquery(
4000        src_query,
4001        self.flavor,
4002        sub_name = 'src',
4003        cols_to_select = cols_list_str,
4004    ) + f"""
4005        WHERE {duplicate_row_number_name} = 1
4006        """
4007    old_mysql_query = (
4008        f"""
4009        SELECT
4010            {index_list_str}
4011        FROM (
4012          SELECT
4013            {index_list_str},
4014            IF(
4015                @{previous_row_number_name} <> {index_list_str.replace(', ', ' + ')},
4016                @{duplicate_row_number_name} := 0,
4017                @{duplicate_row_number_name}
4018            ),
4019            @{previous_row_number_name} := {index_list_str.replace(', ', ' + ')},
4020            @{duplicate_row_number_name} := @{duplicate_row_number_name} + 1 AS """
4021        + f"""{duplicate_row_number_name}
4022          FROM
4023            {pipe_table_name},
4024            (
4025                SELECT @{duplicate_row_number_name} := 0
4026            ) AS {duplicate_row_number_name},
4027            (
4028                SELECT @{previous_row_number_name} := '{get_null_replacement('str', 'mysql')}'
4029            ) AS {previous_row_number_name}
4030          ORDER BY {index_list_str_ordered}
4031        ) AS t
4032        WHERE {duplicate_row_number_name} = 1
4033        """
4034    )
4035    if is_old_mysql:
4036        duplicates_cte_subquery = old_mysql_query
4037
4038    session_id = generate_password(3)
4039
4040    dedup_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='dedup')
4041    temp_old_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='old')
4042    temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
4043
4044    create_temporary_table_query = get_create_table_query(
4045        duplicates_cte_subquery,
4046        dedup_table,
4047        self.flavor,
4048    ) + f"""
4049    ORDER BY {index_list_str_ordered}
4050    """
4051    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
4052    alter_queries = flatten_list([
4053        get_rename_table_queries(
4054            pipe.target,
4055            temp_old_table,
4056            self.flavor,
4057            schema=self.get_pipe_schema(pipe),
4058        ),
4059        get_rename_table_queries(
4060            dedup_table,
4061            pipe.target,
4062            self.flavor,
4063            schema=None,
4064            new_schema=self.get_pipe_schema(pipe),
4065        ),
4066        f"DROP TABLE {if_exists_str} {temp_old_table_name}",
4067    ])
4068
4069    self._log_temporary_tables_creation(temp_old_table, create=(not pipe.temporary), debug=debug)
4070    create_temporary_result = self.execute(create_temporary_table_query, debug=debug)
4071    if create_temporary_result is None:
4072        return False, f"Failed to deduplicate table {pipe_table_name}."
4073
4074    results = self.exec_queries(
4075        alter_queries,
4076        break_on_error=True,
4077        rollback=True,
4078        debug=debug,
4079    )
4080
4081    fail_query = None
4082    for result, query in zip(results, alter_queries):
4083        if result is None:
4084            fail_query = query
4085            break
4086    success = fail_query is None
4087
4088    new_rowcount = (
4089        self.value(get_rowcount_query, debug=debug)
4090        if success
4091        else None
4092    )
4093
4094    msg = (
4095        (
4096            f"Successfully deduplicated table {pipe_table_name}"
4097            + (
4098                f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
4099                if old_rowcount != new_rowcount
4100                else ''
4101            ) + '.'
4102        )
4103        if success
4104        else f"Failed to execute query:\n{fail_query}"
4105    )
4106    return success, msg

Delete duplicate values within a pipe's table.

Parameters
  • pipe (mrsm.Pipe): The pipe whose table to deduplicate.
  • begin (Union[datetime, int, None], default None): If provided, only deduplicate values greater than or equal to this value.
  • end (Union[datetime, int, None], default None): If provided, only deduplicate values less than this value.
  • params (Optional[Dict[str, Any]], default None): If provided, further limit deduplication to values which match this query dictionary.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success.
def get_pipe_table( self, pipe: meerschaum.Pipe, debug: bool = False) -> "Union['sqlalchemy.Table', None]":
3249def get_pipe_table(
3250    self,
3251    pipe: mrsm.Pipe,
3252    debug: bool = False,
3253) -> Union['sqlalchemy.Table', None]:
3254    """
3255    Return the `sqlalchemy.Table` object for a `mrsm.Pipe`.
3256
3257    Parameters
3258    ----------
3259    pipe: mrsm.Pipe:
3260        The pipe in question.
3261
3262    Returns
3263    -------
3264    A `sqlalchemy.Table` object. 
3265
3266    """
3267    from meerschaum.utils.sql import get_sqlalchemy_table
3268    if not pipe.exists(debug=debug):
3269        return None
3270
3271    return get_sqlalchemy_table(
3272        pipe.target,
3273        connector=self,
3274        schema=self.get_pipe_schema(pipe),
3275        debug=debug,
3276        refresh=True,
3277    )

Return the sqlalchemy.Table object for a mrsm.Pipe.

Parameters
  • pipe (mrsm.Pipe:): The pipe in question.
Returns
  • A sqlalchemy.Table object.
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, str]:
3280def get_pipe_columns_types(
3281    self,
3282    pipe: mrsm.Pipe,
3283    debug: bool = False,
3284) -> Dict[str, str]:
3285    """
3286    Get the pipe's columns and types.
3287
3288    Parameters
3289    ----------
3290    pipe: mrsm.Pipe:
3291        The pipe to get the columns for.
3292
3293    Returns
3294    -------
3295    A dictionary of columns names (`str`) and types (`str`).
3296
3297    Examples
3298    --------
3299    >>> conn.get_pipe_columns_types(pipe)
3300    {
3301      'dt': 'TIMESTAMP WITHOUT TIMEZONE',
3302      'id': 'BIGINT',
3303      'val': 'DOUBLE PRECISION',
3304    }
3305    >>> 
3306    """
3307    from meerschaum.utils.sql import get_table_cols_types
3308    if not pipe.exists(debug=debug):
3309        return {}
3310
3311    if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite', 'geopackage'):
3312        return get_table_cols_types(
3313            pipe.target,
3314            self,
3315            flavor=self.flavor,
3316            schema=self.get_pipe_schema(pipe),
3317            debug=debug,
3318        )
3319
3320    if debug:
3321        dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.")
3322
3323    table_columns = {}
3324    try:
3325        pipe_table = self.get_pipe_table(pipe, debug=debug)
3326        if pipe_table is None:
3327            return {}
3328
3329        if debug:
3330            dprint("Found columns:")
3331            mrsm.pprint(dict(pipe_table.columns))
3332
3333        for col in pipe_table.columns:
3334            table_columns[str(col.name)] = str(col.type)
3335    except Exception as e:
3336        traceback.print_exc()
3337        warn(e)
3338        table_columns = {}
3339
3340    return table_columns

Get the pipe's columns and types.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get the columns for.
Returns
  • A dictionary of columns names (str) and types (str).
Examples
>>> conn.get_pipe_columns_types(pipe)
{
  'dt': 'TIMESTAMP WITHOUT TIMEZONE',
  'id': 'BIGINT',
  'val': 'DOUBLE PRECISION',
}
>>>
def get_to_sql_dtype( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'", update_dtypes: bool = True) -> "Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']":
3835def get_to_sql_dtype(
3836    self,
3837    pipe: 'mrsm.Pipe',
3838    df: 'pd.DataFrame',
3839    update_dtypes: bool = True,
3840) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
3841    """
3842    Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
3843
3844    Parameters
3845    ----------
3846    pipe: mrsm.Pipe
3847        The pipe which may contain a `dtypes` parameter.
3848
3849    df: pd.DataFrame
3850        The DataFrame to be pushed via `to_sql()`.
3851
3852    update_dtypes: bool, default True
3853        If `True`, patch the pipe's dtypes onto the DataFrame's dtypes.
3854
3855    Returns
3856    -------
3857    A dictionary with `sqlalchemy` datatypes.
3858
3859    Examples
3860    --------
3861    >>> import pandas as pd
3862    >>> import meerschaum as mrsm
3863    >>> 
3864    >>> conn = mrsm.get_connector('sql:memory')
3865    >>> df = pd.DataFrame([{'a': {'b': 1}}])
3866    >>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
3867    >>> get_to_sql_dtype(pipe, df)
3868    {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3869    """
3870    from meerschaum.utils.dataframe import get_special_cols
3871    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3872    df_dtypes = {
3873        col: str(typ)
3874        for col, typ in df.dtypes.items()
3875    }
3876    special_cols = get_special_cols(df)
3877    df_dtypes.update(special_cols)
3878
3879    if update_dtypes:
3880        df_dtypes.update(pipe.dtypes)
3881
3882    return {
3883        col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
3884        for col, typ in df_dtypes.items()
3885        if col and typ
3886    }

Given a pipe and DataFrame, return the dtype dictionary for to_sql().

Parameters
  • pipe (mrsm.Pipe): The pipe which may contain a dtypes parameter.
  • df (pd.DataFrame): The DataFrame to be pushed via to_sql().
  • update_dtypes (bool, default True): If True, patch the pipe's dtypes onto the DataFrame's dtypes.
Returns
  • A dictionary with sqlalchemy datatypes.
Examples
>>> import pandas as pd
>>> import meerschaum as mrsm
>>> 
>>> conn = mrsm.get_connector('sql:memory')
>>> df = pd.DataFrame([{'a': {'b': 1}}])
>>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
>>> get_to_sql_dtype(pipe, df)
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
def get_pipe_schema(self, pipe: meerschaum.Pipe) -> Optional[str]:
4109def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]:
4110    """
4111    Return the schema to use for this pipe.
4112    First check `pipe.parameters['schema']`, then check `self.schema`.
4113
4114    Parameters
4115    ----------
4116    pipe: mrsm.Pipe
4117        The pipe which may contain a configured schema.
4118
4119    Returns
4120    -------
4121    A schema string or `None` if nothing is configured.
4122    """
4123    if self.flavor in ('sqlite', 'geopackage'):
4124        return self.schema
4125    return pipe.parameters.get('schema', self.schema)

Return the schema to use for this pipe. First check pipe.parameters['schema'], then check self.schema.

Parameters
  • pipe (mrsm.Pipe): The pipe which may contain a configured schema.
Returns
  • A schema string or None if nothing is configured.
def create_pipe_table_from_df( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'", debug: bool = False) -> Tuple[bool, str]:
1640def create_pipe_table_from_df(
1641    self,
1642    pipe: mrsm.Pipe,
1643    df: 'pd.DataFrame',
1644    debug: bool = False,
1645) -> mrsm.SuccessTuple:
1646    """
1647    Create a pipe's table from its configured dtypes and an incoming dataframe.
1648    """
1649    from meerschaum.utils.dataframe import get_special_cols
1650    from meerschaum.utils.sql import (
1651        get_create_table_queries,
1652        sql_item_name,
1653        get_create_schema_if_not_exists_queries,
1654    )
1655    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1656    if self.flavor == 'geopackage':
1657        init_success, init_msg = self._init_geopackage_pipe(df, pipe, debug=debug)
1658        if not init_success:
1659            return init_success, init_msg
1660
1661    primary_key = pipe.columns.get('primary', None)
1662    primary_key_typ = (
1663        pipe.dtypes.get(primary_key, str(df.dtypes.get(primary_key, 'int')))
1664        if primary_key
1665        else None
1666    )
1667    primary_key_db_type = (
1668        get_db_type_from_pd_type(primary_key_typ, self.flavor)
1669        if primary_key
1670        else None
1671    )
1672    dt_col = pipe.columns.get('datetime', None)
1673    new_dtypes = {
1674        **{
1675            col: str(typ)
1676            for col, typ in df.dtypes.items()
1677        },
1678        **{
1679            col: str(df.dtypes.get(col, 'int'))
1680            for col_ix, col in pipe.columns.items()
1681            if col and col_ix != 'primary'
1682        },
1683        **get_special_cols(df),
1684        **pipe.dtypes
1685    }
1686    autoincrement = (
1687        pipe.parameters.get('autoincrement', False)
1688        or (primary_key and primary_key not in new_dtypes)
1689    )
1690    if autoincrement:
1691        _ = new_dtypes.pop(primary_key, None)
1692
1693    schema = self.get_pipe_schema(pipe)
1694
1695    ### When supported (TimescaleDB 2.21+), create the hypertable declaratively via
1696    ### `CREATE TABLE ... WITH (tsdb.hypertable, ...)`. Fall back to a plain `CREATE TABLE`
1697    ### (and the `create_hypertable()` call in `get_create_index_queries`) if it fails.
1698    hypertable = (
1699        self.flavor in ('timescaledb', 'timescaledb-ha')
1700        and pipe.parameters.get('hypertable', True)
1701        and dt_col is not None
1702    )
1703
1704    ### Use the declarative `CREATE TABLE ... WITH (tsdb.hypertable, ...)` path only when Hypercore
1705    ### is enabled (the default). Declarative creation enables the columnstore (via the
1706    ### `segmentby`/`orderby` options) AND makes TimescaleDB auto-install a columnstore policy —
1707    ### exactly the Hypercore behavior we want. With `hypercore=False`, fall back to a plain table
1708    ### plus the `create_hypertable()` call during index creation, which adds NO columnstore policy,
1709    ### keeping `hypercore` a true opt-out (a plain row-store hypertable).
1710    hypercore = hypertable and pipe.parameters.get('hypercore', True)
1711    hypertable_chunk_interval = None
1712    hypertable_segmentby = None
1713    hypertable_orderby = None
1714    if hypercore:
1715        chunk_interval = pipe.get_chunk_interval(debug=debug)
1716        hypertable_chunk_interval = (
1717            f'{chunk_interval}'
1718            if isinstance(chunk_interval, int)
1719            else f'{int(chunk_interval.total_seconds() / 60)} minutes'
1720        )
1721        _compress_settings = self._get_compress_settings(pipe)
1722        hypertable_segmentby = _compress_settings['segmentby'] or None
1723        hypertable_orderby = _compress_settings['orderby'] or None
1724
1725    ### Native range partitioning (non-TimescaleDB flavors); a no-op column for others.
1726    partition_by_column = self._get_partition_column(pipe)
1727    ### MySQL/MariaDB require the initial partitions declared inline at `CREATE TABLE`
1728    ### (an empty RANGE-partitioned table is invalid); compute them from the creation df.
1729    partition_bounds = (
1730        self._get_initial_partition_bounds(pipe, df, debug=debug)
1731        if (partition_by_column is not None and self.flavor in ('mysql', 'mariadb'))
1732        else None
1733    )
1734    ### A MySQL RANGE table needs at least one inline partition; if the creation df has no
1735    ### datetime values, fall back to a plain table (rare — `is_new` normally implies rows).
1736    if partition_by_column is not None and self.flavor in ('mysql', 'mariadb') and not partition_bounds:
1737        partition_by_column = None
1738
1739    ### MSSQL partitions via a function + scheme created before the table; its clustered index is
1740    ### placed on the scheme (passed as `partition_scheme_name`).
1741    partition_scheme_name = None
1742    partition_creation_queries = []
1743    if partition_by_column is not None and self.flavor == 'mssql':
1744        partition_scheme_name = self._partition_scheme_name(pipe)
1745        partition_creation_queries = self._get_mssql_partition_creation_queries(
1746            pipe, df, debug=debug
1747        )
1748
1749    def _build_create_table_queries(_hypertable_chunk_interval):
1750        _queries = get_create_table_queries(
1751            new_dtypes,
1752            pipe.target,
1753            self.flavor,
1754            schema=schema,
1755            primary_key=primary_key,
1756            primary_key_db_type=primary_key_db_type,
1757            datetime_column=dt_col,
1758            hypertable_chunk_interval=_hypertable_chunk_interval,
1759            hypertable_segmentby=(hypertable_segmentby if _hypertable_chunk_interval else None),
1760            hypertable_orderby=(hypertable_orderby if _hypertable_chunk_interval else None),
1761            partition_by_column=partition_by_column,
1762            partition_bounds=partition_bounds,
1763            partition_scheme_name=partition_scheme_name,
1764        )
1765        if partition_creation_queries:
1766            _queries = partition_creation_queries + _queries
1767        if schema:
1768            _queries = (
1769                get_create_schema_if_not_exists_queries(schema, self.flavor)
1770                + _queries
1771            )
1772        return _queries
1773
1774    create_table_queries = _build_create_table_queries(hypertable_chunk_interval)
1775    success = all(
1776        self.exec_queries(
1777            create_table_queries,
1778            break_on_error=True,
1779            rollback=True,
1780            silent=hypercore,
1781            debug=debug,
1782        )
1783    )
1784    if not success and hypercore:
1785        ### Declarative hypertable syntax unsupported; retry as a plain table.
1786        ### `create_hypertable()` runs later during index creation.
1787        create_table_queries = _build_create_table_queries(None)
1788        success = all(
1789            self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
1790        )
1791    target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor)
1792    msg = (
1793        "Success"
1794        if success
1795        else f"Failed to create {target_name}."
1796    )
1797    if success and self.flavor == 'geopackage':
1798        return self._init_geopackage_pipe(df, pipe, debug=debug)
1799
1800    return success, msg

Create a pipe's table from its configured dtypes and an incoming dataframe.

def get_pipe_columns_indices( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[Dict[str, str]]]:
3343def get_pipe_columns_indices(
3344    self,
3345    pipe: mrsm.Pipe,
3346    debug: bool = False,
3347) -> Dict[str, List[Dict[str, str]]]:
3348    """
3349    Return a dictionary mapping columns to the indices created on those columns.
3350
3351    Parameters
3352    ----------
3353    pipe: mrsm.Pipe
3354        The pipe to be queried against.
3355
3356    Returns
3357    -------
3358    A dictionary mapping columns names to lists of dictionaries.
3359    The dictionaries in the lists contain the name and type of the indices.
3360    """
3361    if pipe.__dict__.get('_skip_check_indices', False):
3362        return {}
3363
3364    from meerschaum.utils.sql import get_table_cols_indices
3365    return get_table_cols_indices(
3366        pipe.target,
3367        self,
3368        flavor=self.flavor,
3369        schema=self.get_pipe_schema(pipe),
3370        debug=debug,
3371    )

Return a dictionary mapping columns to the indices created on those columns.

Parameters
  • pipe (mrsm.Pipe): The pipe to be queried against.
Returns
  • A dictionary mapping columns names to lists of dictionaries.
  • The dictionaries in the lists contain the name and type of the indices.
@staticmethod
def get_temporary_target( target: str, transact_id: Optional[str] = None, label: Optional[str] = None, separator: Optional[str] = None) -> str:
4128@staticmethod
4129def get_temporary_target(
4130    target: str,
4131    transact_id: Optional[str] = None,
4132    label: Optional[str] = None,
4133    separator: Optional[str] = None,
4134) -> str:
4135    """
4136    Return a unique(ish) temporary target for a pipe.
4137    """
4138    from meerschaum.utils.misc import generate_password
4139    temp_target_cf = (
4140        mrsm.get_config('system', 'connectors', 'sql', 'instance', 'temporary_target') or {}
4141    )
4142    transaction_id_len = temp_target_cf.get('transaction_id_length', 3)
4143    transact_id = transact_id or generate_password(transaction_id_len)
4144    temp_prefix = temp_target_cf.get('prefix', '_')
4145    separator = separator or temp_target_cf.get('separator', '_')
4146    return (
4147        temp_prefix
4148        + target
4149        + separator
4150        + transact_id
4151        + ((separator + label) if label else '')
4152    )

Return a unique(ish) temporary target for a pipe.

def create_pipe_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, debug: bool = False) -> Tuple[bool, str]:
361def create_pipe_indices(
362    self,
363    pipe: mrsm.Pipe,
364    columns: Optional[List[str]] = None,
365    debug: bool = False,
366) -> SuccessTuple:
367    """
368    Create a pipe's indices.
369    """
370    success = self.create_indices(pipe, columns=columns, debug=debug)
371    msg = (
372        "Success"
373        if success
374        else f"Failed to create indices for {pipe}."
375    )
376    return success, msg

Create a pipe's indices.

def drop_pipe_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, debug: bool = False) -> Tuple[bool, str]:
417def drop_pipe_indices(
418    self,
419    pipe: mrsm.Pipe,
420    columns: Optional[List[str]] = None,
421    debug: bool = False,
422) -> SuccessTuple:
423    """
424    Drop a pipe's indices.
425    """
426    success = self.drop_indices(pipe, columns=columns, debug=debug)
427    msg = (
428        "Success"
429        if success
430        else f"Failed to drop indices for {pipe}."
431    )
432    return success, msg

Drop a pipe's indices.

def get_pipe_index_names(self, pipe: meerschaum.Pipe) -> Dict[str, str]:
469def get_pipe_index_names(self, pipe: mrsm.Pipe) -> Dict[str, str]:
470    """
471    Return a dictionary mapping index keys to their names on the database.
472
473    Returns
474    -------
475    A dictionary of index keys to column names.
476    """
477    from meerschaum.utils.sql import DEFAULT_SCHEMA_FLAVORS, truncate_item_name
478    _parameters = pipe.parameters
479    _index_template = _parameters.get('index_template', "IX_{schema_str}{target}_{column_names}")
480    _schema = self.get_pipe_schema(pipe)
481    if _schema is None:
482        _schema = (
483            DEFAULT_SCHEMA_FLAVORS.get(self.flavor, None)
484            if self.flavor != 'mssql'
485            else None
486        )
487    schema_str = '' if _schema is None else f'{_schema}_'
488    schema_str = ''
489    _indices = pipe.indices
490    _target = pipe.target
491    _column_names = {
492        ix: (
493            '_'.join(cols)
494            if isinstance(cols, (list, tuple))
495            else str(cols)
496        )
497        for ix, cols in _indices.items()
498        if cols
499    }
500    _index_names = {
501        ix: _index_template.format(
502            target=_target,
503            column_names=column_names,
504            connector_keys=pipe.connector_keys,
505            metric_key=pipe.metric_key,
506            location_key=pipe.location_key,
507            schema_str=schema_str,
508        )
509        for ix, column_names in _column_names.items()
510    }
511    ### NOTE: Skip any duplicate indices.
512    seen_index_names = {}
513    for ix, index_name in _index_names.items():
514        if index_name in seen_index_names:
515            continue
516        seen_index_names[index_name] = ix
517    return {
518        ix: truncate_item_name(index_name, flavor=self.flavor)
519        for index_name, ix in seen_index_names.items()
520    }

Return a dictionary mapping index keys to their names on the database.

Returns
  • A dictionary of index keys to column names.
def get_plugins_pipe(self) -> meerschaum.Pipe:
18def get_plugins_pipe(self) -> mrsm.Pipe:
19    """
20    Return the internal metadata plugins pipe.
21    """
22    users_pipe = self.get_users_pipe()
23    user_id_dtype = users_pipe.dtypes.get('user_id', 'int')
24    return mrsm.Pipe(
25        'mrsm', 'plugins',
26        instance=self,
27        temporary=True,
28        static=True,
29        null_indices=False,
30        columns={
31            'primary': 'plugin_id',
32            'user_id': 'user_id',    
33        },
34        dtypes={
35            'plugin_name': 'string',
36            'user_id': user_id_dtype,
37            'attributes': 'json',
38            'version': 'string',
39        },
40        indices={
41            'unique': 'plugin_name',
42        },
43    )

Return the internal metadata plugins pipe.

def register_plugin( self, plugin: meerschaum.Plugin, force: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
 46def register_plugin(
 47    self,
 48    plugin: 'mrsm.core.Plugin',
 49    force: bool = False,
 50    debug: bool = False,
 51    **kw: Any
 52) -> SuccessTuple:
 53    """Register a new plugin to the plugins table."""
 54    from meerschaum.utils.packages import attempt_import
 55    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
 56    from meerschaum.utils.sql import json_flavors
 57    from meerschaum.connectors.sql.tables import get_tables
 58    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
 59
 60    old_id = self.get_plugin_id(plugin, debug=debug)
 61
 62    ### Check for version conflict. May be overridden with `--force`.
 63    if old_id is not None and not force:
 64        old_version = self.get_plugin_version(plugin, debug=debug)
 65        new_version = plugin.version
 66        if old_version is None:
 67            old_version = ''
 68        if new_version is None:
 69            new_version = ''
 70
 71        ### verify that the new version is greater than the old
 72        packaging_version = attempt_import('packaging.version')
 73        if (
 74            old_version and new_version
 75            and packaging_version.parse(old_version) >= packaging_version.parse(new_version)
 76        ):
 77            return False, (
 78                f"Version '{new_version}' of plugin '{plugin}' " +
 79                f"must be greater than existing version '{old_version}'."
 80            )
 81
 82    bind_variables = {
 83        'plugin_name': plugin.name,
 84        'version': plugin.version,
 85        'attributes': (
 86            json.dumps(plugin.attributes) if self.flavor not in json_flavors else plugin.attributes
 87        ),
 88        'user_id': plugin.user_id,
 89    }
 90
 91    if old_id is None:
 92        query = sqlalchemy.insert(plugins_tbl).values(**bind_variables)
 93    else:
 94        query = (
 95            sqlalchemy.update(plugins_tbl)
 96            .values(**bind_variables)
 97            .where(plugins_tbl.c.plugin_id == old_id)
 98        )
 99
100    result = self.exec(query, debug=debug)
101    if result is None:
102        return False, f"Failed to register plugin '{plugin}'."
103    return True, f"Successfully registered plugin '{plugin}'."

Register a new plugin to the plugins table.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
272def delete_plugin(
273    self,
274    plugin: 'mrsm.core.Plugin',
275    debug: bool = False,
276    **kw: Any
277) -> SuccessTuple:
278    """Delete a plugin from the plugins table."""
279    from meerschaum.utils.packages import attempt_import
280    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
281    from meerschaum.connectors.sql.tables import get_tables
282    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
283
284    plugin_id = self.get_plugin_id(plugin, debug=debug)
285    if plugin_id is None:
286        return True, f"Plugin '{plugin}' was not registered."
287
288    query = sqlalchemy.delete(plugins_tbl).where(plugins_tbl.c.plugin_id == plugin_id)
289    result = self.exec(query, debug=debug)
290    if result is None:
291        return False, f"Failed to delete plugin '{plugin}'."
292    return True, f"Successfully deleted plugin '{plugin}'."

Delete a plugin from the plugins table.

def get_plugin_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[int]:
105def get_plugin_id(
106    self,
107    plugin: 'mrsm.core.Plugin',
108    debug: bool = False
109) -> Optional[int]:
110    """
111    Return a plugin's ID.
112    """
113    ### ensure plugins table exists
114    from meerschaum.connectors.sql.tables import get_tables
115    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
116    from meerschaum.utils.packages import attempt_import
117    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
118
119    query = (
120        sqlalchemy
121        .select(plugins_tbl.c.plugin_id)
122        .where(plugins_tbl.c.plugin_name == plugin.name)
123    )
124    
125    try:
126        return int(self.value(query, debug=debug))
127    except Exception:
128        return None

Return a plugin's ID.

def get_plugin_version( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
131def get_plugin_version(
132    self,
133    plugin: 'mrsm.core.Plugin',
134    debug: bool = False
135) -> Optional[str]:
136    """
137    Return a plugin's version.
138    """
139    ### ensure plugins table exists
140    from meerschaum.connectors.sql.tables import get_tables
141    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
142    from meerschaum.utils.packages import attempt_import
143    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
144    query = sqlalchemy.select(plugins_tbl.c.version).where(plugins_tbl.c.plugin_name == plugin.name)
145    return self.value(query, debug=debug)

Return a plugin's version.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False, **kw: Any) -> List[str]:
225def get_plugins(
226    self,
227    user_id: Optional[int] = None,
228    search_term: Optional[str] = None,
229    debug: bool = False,
230    **kw: Any
231) -> List[str]:
232    """
233    Return a list of all registered plugins.
234
235    Parameters
236    ----------
237    user_id: Optional[int], default None
238        If specified, filter plugins by a specific `user_id`.
239
240    search_term: Optional[str], default None
241        If specified, add a `WHERE plugin_name LIKE '{search_term}%'` clause to filter the plugins.
242
243
244    Returns
245    -------
246    A list of plugin names.
247    """
248    ### ensure plugins table exists
249    from meerschaum.connectors.sql.tables import get_tables
250    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
251    from meerschaum.utils.packages import attempt_import
252    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
253
254    query = sqlalchemy.select(plugins_tbl.c.plugin_name)
255    if user_id is not None:
256        query = query.where(plugins_tbl.c.user_id == user_id)
257    if search_term is not None:
258        query = query.where(plugins_tbl.c.plugin_name.like(search_term + '%'))
259
260    rows = (
261        self.execute(query).fetchall()
262        if self.flavor != 'duckdb'
263        else [
264            (row['plugin_name'],)
265            for row in self.read(query).to_dict(orient='records')
266        ]
267    )
268    
269    return [row[0] for row in rows]

Return a list of all registered plugins.

Parameters
  • user_id (Optional[int], default None): If specified, filter plugins by a specific user_id.
  • search_term (Optional[str], default None): If specified, add a WHERE plugin_name LIKE '{search_term}%' clause to filter the plugins.
Returns
  • A list of plugin names.
def get_plugin_user_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[int]:
147def get_plugin_user_id(
148    self,
149    plugin: 'mrsm.core.Plugin',
150    debug: bool = False
151) -> Optional[int]:
152    """
153    Return a plugin's user ID.
154    """
155    ### ensure plugins table exists
156    from meerschaum.connectors.sql.tables import get_tables
157    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
158    from meerschaum.utils.packages import attempt_import
159    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
160
161    query = (
162        sqlalchemy
163        .select(plugins_tbl.c.user_id)
164        .where(plugins_tbl.c.plugin_name == plugin.name)
165    )
166
167    try:
168        return int(self.value(query, debug=debug))
169    except Exception:
170        return None

Return a plugin's user ID.

def get_plugin_username( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
172def get_plugin_username(
173    self,
174    plugin: 'mrsm.core.Plugin',
175    debug: bool = False
176) -> Optional[str]:
177    """
178    Return the username of a plugin's owner.
179    """
180    ### ensure plugins table exists
181    from meerschaum.connectors.sql.tables import get_tables
182    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
183    users = get_tables(mrsm_instance=self, debug=debug)['users']
184    from meerschaum.utils.packages import attempt_import
185    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
186
187    query = (
188        sqlalchemy.select(users.c.username)
189        .where(
190            users.c.user_id == plugins_tbl.c.user_id
191            and plugins_tbl.c.plugin_name == plugin.name
192        )
193    )
194
195    return self.value(query, debug=debug)

Return the username of a plugin's owner.

def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Dict[str, Any]:
198def get_plugin_attributes(
199    self,
200    plugin: 'mrsm.core.Plugin',
201    debug: bool = False
202) -> Dict[str, Any]:
203    """
204    Return the attributes of a plugin.
205    """
206    ### ensure plugins table exists
207    from meerschaum.connectors.sql.tables import get_tables
208    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
209    from meerschaum.utils.packages import attempt_import
210    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
211
212    query = (
213        sqlalchemy
214        .select(plugins_tbl.c.attributes)
215        .where(plugins_tbl.c.plugin_name == plugin.name)
216    )
217
218    _attr = self.value(query, debug=debug)
219    if isinstance(_attr, str):
220        _attr = json.loads(_attr)
221    elif _attr is None:
222        _attr = {}
223    return _attr

Return the attributes of a plugin.

def get_users_pipe(self) -> meerschaum.Pipe:
16def get_users_pipe(self) -> mrsm.Pipe:
17    """
18    Return the internal metadata pipe for users management.
19    """
20    if '_users_pipe' in self.__dict__:
21        return self._users_pipe
22
23    cache_connector = self.__dict__.get('_cache_connector', None)
24    self._users_pipe = mrsm.Pipe(
25        'mrsm', 'users',
26        temporary=True,
27        cache=True,
28        cache_connector_keys=cache_connector,
29        static=True,
30        null_indices=False,
31        enforce=False,
32        autoincrement=True,
33        columns={
34            'primary': 'user_id',
35        },
36        dtypes={
37            'user_id': 'int',
38            'username': 'string',
39            'attributes': 'json',
40            'user_type': 'string',
41        },
42        indices={
43            'unique': 'username',
44        },
45    )
46    return self._users_pipe

Return the internal metadata pipe for users management.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
49def register_user(
50    self,
51    user: mrsm.core.User,
52    debug: bool = False,
53    **kw: Any
54) -> SuccessTuple:
55    """Register a new user."""
56    from meerschaum.utils.packages import attempt_import
57    from meerschaum.utils.sql import json_flavors
58    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
59
60    valid_tuple = valid_username(user.username)
61    if not valid_tuple[0]:
62        return valid_tuple
63
64    old_id = self.get_user_id(user, debug=debug)
65
66    if old_id is not None:
67        return False, f"User '{user}' already exists."
68
69    ### ensure users table exists
70    from meerschaum.connectors.sql.tables import get_tables
71    tables = get_tables(mrsm_instance=self, debug=debug)
72
73    import json
74    bind_variables = {
75        'username': user.username,
76        'email': user.email,
77        'password_hash': user.password_hash,
78        'user_type': user.type,
79        'attributes': (
80            json.dumps(user.attributes)
81            if self.flavor not in json_flavors
82            else user.attributes
83        ),
84    }
85    if old_id is not None:
86        return False, f"User '{user.username}' already exists."
87    if old_id is None:
88        query = (
89            sqlalchemy.insert(tables['users']).
90            values(**bind_variables)
91        )
92
93    result = self.exec(query, debug=debug)
94    if result is None:
95        return False, f"Failed to register user '{user}'."
96    return True, f"Successfully registered user '{user}'."

Register a new user.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[int]:
188def get_user_id(
189    self,
190    user: 'mrsm.core.User',
191    debug: bool = False
192) -> Optional[int]:
193    """If a user is registered, return the `user_id`."""
194    ### ensure users table exists
195    from meerschaum.utils.packages import attempt_import
196    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
197    from meerschaum.connectors.sql.tables import get_tables
198    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
199
200    query = (
201        sqlalchemy.select(users_tbl.c.user_id)
202        .where(users_tbl.c.username == user.username)
203    )
204
205    result = self.value(query, debug=debug)
206    if result is not None:
207        return int(result)
208    return None

If a user is registered, return the user_id.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
282def get_users(
283    self,
284    debug: bool = False,
285    **kw: Any
286) -> List[str]:
287    """
288    Get the registered usernames.
289    """
290    ### ensure users table exists
291    from meerschaum.connectors.sql.tables import get_tables
292    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
293    from meerschaum.utils.packages import attempt_import
294    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
295
296    query = sqlalchemy.select(users_tbl.c.username)
297
298    return list(self.read(query, debug=debug)['username'])

Get the registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
133def edit_user(
134    self,
135    user: 'mrsm.core.User',
136    debug: bool = False,
137    **kw: Any
138) -> SuccessTuple:
139    """Update an existing user's metadata."""
140    from meerschaum.utils.packages import attempt_import
141    from meerschaum.utils.sql import json_flavors
142    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
143    from meerschaum.connectors.sql.tables import get_tables
144    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
145
146    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
147    if user_id is None:
148        return False, (
149            f"User '{user.username}' does not exist. "
150            f"Register user '{user.username}' before editing."
151        )
152    user.user_id = user_id
153
154    import json
155    valid_tuple = valid_username(user.username)
156    if not valid_tuple[0]:
157        return valid_tuple
158
159    bind_variables = {
160        'user_id' : user_id,
161        'username' : user.username,
162    }
163    if user.password != '':
164        bind_variables['password_hash'] = user.password_hash
165    if user.email != '':
166        bind_variables['email'] = user.email
167    if user.attributes is not None and user.attributes != {}:
168        bind_variables['attributes'] = (
169            json.dumps(user.attributes) if self.flavor not in json_flavors
170            else user.attributes
171        )
172    if user.type != '':
173        bind_variables['user_type'] = user.type
174
175    query = (
176        sqlalchemy
177        .update(users_tbl)
178        .values(**bind_variables)
179        .where(users_tbl.c.user_id == user_id)
180    )
181
182    result = self.exec(query, debug=debug)
183    if result is None:
184        return False, f"Failed to edit user '{user}'."
185    return True, f"Successfully edited user '{user}'."

Update an existing user's metadata.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Tuple[bool, str]:
250def delete_user(
251    self,
252    user: 'mrsm.core.User',
253    debug: bool = False
254) -> SuccessTuple:
255    """Delete a user's record from the users table."""
256    ### ensure users table exists
257    from meerschaum.connectors.sql.tables import get_tables
258    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
259    plugins = get_tables(mrsm_instance=self, debug=debug)['plugins']
260    from meerschaum.utils.packages import attempt_import
261    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
262
263    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
264
265    if user_id is None:
266        return False, f"User '{user.username}' is not registered and cannot be deleted."
267
268    query = sqlalchemy.delete(users_tbl).where(users_tbl.c.user_id == user_id)
269
270    result = self.exec(query, debug=debug)
271    if result is None:
272        return False, f"Failed to delete user '{user}'."
273
274    query = sqlalchemy.delete(plugins).where(plugins.c.user_id == user_id)
275    result = self.exec(query, debug=debug)
276    if result is None:
277        return False, f"Failed to delete plugins of user '{user}'."
278
279    return True, f"Successfully deleted user '{user}'"

Delete a user's record from the users table.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
301def get_user_password_hash(
302    self,
303    user: 'mrsm.core.User',
304    debug: bool = False,
305    **kw: Any
306) -> Optional[str]:
307    """
308    Return the password has for a user.
309    **NOTE**: This may be dangerous and is only allowed if the security settings explicity allow it.
310    """
311    from meerschaum.utils.debug import dprint
312    from meerschaum.connectors.sql.tables import get_tables
313    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
314    from meerschaum.utils.packages import attempt_import
315    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
316
317    if user.user_id is not None:
318        user_id = user.user_id
319        if debug:
320            dprint(f"Already given user_id: {user_id}")
321    else:
322        if debug:
323            dprint("Fetching user_id...")
324        user_id = self.get_user_id(user, debug=debug)
325
326    if user_id is None:
327        return None
328
329    query = sqlalchemy.select(users_tbl.c.password_hash).where(users_tbl.c.user_id == user_id)
330
331    return self.value(query, debug=debug)

Return the password has for a user. NOTE: This may be dangerous and is only allowed if the security settings explicity allow it.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
334def get_user_type(
335    self,
336    user: 'mrsm.core.User',
337    debug: bool = False,
338    **kw: Any
339) -> Optional[str]:
340    """
341    Return the user's type.
342    """
343    from meerschaum.connectors.sql.tables import get_tables
344    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
345    from meerschaum.utils.packages import attempt_import
346    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
347
348    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
349
350    if user_id is None:
351        return None
352
353    query = sqlalchemy.select(users_tbl.c.user_type).where(users_tbl.c.user_id == user_id)
354
355    return self.value(query, debug=debug)

Return the user's type.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[Dict[str, Any]]:
210def get_user_attributes(
211    self,
212    user: 'mrsm.core.User',
213    debug: bool = False
214) -> Union[Dict[str, Any], None]:
215    """
216    Return the user's attributes.
217    """
218    ### ensure users table exists
219    from meerschaum.utils.warnings import warn
220    from meerschaum.utils.packages import attempt_import
221    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
222    from meerschaum.connectors.sql.tables import get_tables
223    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
224
225    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
226
227    query = (
228        sqlalchemy.select(users_tbl.c.attributes)
229        .where(users_tbl.c.user_id == user_id)
230    )
231
232    result = self.value(query, debug=debug)
233    if result is not None and not isinstance(result, dict):
234        try:
235            result = dict(result)
236            _parsed = True
237        except Exception:
238            _parsed = False
239        if not _parsed:
240            try:
241                import json
242                result = json.loads(result)
243                _parsed = True
244            except Exception:
245                _parsed = False
246        if not _parsed:
247            warn(f"Received unexpected type for attributes: {result}")
248    return result

Return the user's attributes.

@classmethod
def from_uri( cls, uri: str, label: Optional[str] = None, as_dict: bool = False) -> Union[SQLConnector, Dict[str, Union[str, int]]]:
15@classmethod
16def from_uri(
17    cls,
18    uri: str,
19    label: Optional[str] = None,
20    as_dict: bool = False,
21) -> Union[
22    'meerschaum.connectors.SQLConnector',
23    Dict[str, Union[str, int]],
24]:
25    """
26    Create a new SQLConnector from a URI string.
27
28    Parameters
29    ----------
30    uri: str
31        The URI connection string.
32
33    label: Optional[str], default None
34        If provided, use this as the connector label.
35        Otherwise use the determined database name.
36
37    as_dict: bool, default False
38        If `True`, return a dictionary of the keyword arguments
39        necessary to create a new `SQLConnector`, otherwise create a new object.
40
41    Returns
42    -------
43    A new SQLConnector object or a dictionary of attributes (if `as_dict` is `True`).
44    """
45
46    params = cls.parse_uri(uri)
47    params['uri'] = uri
48    flavor = params.get('flavor', None)
49    if not flavor or flavor not in cls.flavor_configs:
50        error(f"Invalid flavor '{flavor}' detected from the provided URI.")
51
52    if 'database' not in params:
53        error("Unable to determine the database from the provided URI.")
54
55    if flavor in ('sqlite', 'duckdb', 'geopackage'):
56        if params['database'] == ':memory:':
57            params['label'] = label or f'memory_{flavor}'
58        else:
59            params['label'] = label or params['database'].split(os.path.sep)[-1].lower()
60    else:
61        params['label'] = label or (
62            (
63                (params['username'] + '@' if 'username' in params else '')
64                + params.get('host', '')
65                + ('/' if 'host' in params else '')
66                + params.get('database', '')
67            ).lower()
68        )
69
70    return cls(**params) if not as_dict else params

Create a new SQLConnector from a URI string.

Parameters
  • uri (str): The URI connection string.
  • label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
  • as_dict (bool, default False): If True, return a dictionary of the keyword arguments necessary to create a new SQLConnector, otherwise create a new object.
Returns
  • A new SQLConnector object or a dictionary of attributes (if as_dict is True).
@staticmethod
def parse_uri(uri: str) -> Dict[str, Any]:
 73@staticmethod
 74def parse_uri(uri: str) -> Dict[str, Any]:
 75    """
 76    Parse a URI string into a dictionary of parameters.
 77
 78    Parameters
 79    ----------
 80    uri: str
 81        The database connection URI.
 82
 83    Returns
 84    -------
 85    A dictionary of attributes.
 86
 87    Examples
 88    --------
 89    >>> parse_uri('sqlite:////home/foo/bar.db')
 90    {'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
 91    >>> parse_uri(
 92    ...     'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
 93    ...     + '/master?driver=ODBC+Driver+17+for+SQL+Server'
 94    ... )
 95    {'host': 'localhost', 'database': 'master', 'username': 'sa',
 96    'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
 97    'driver': 'ODBC Driver 17 for SQL Server'}
 98    >>> 
 99    """
100    from urllib.parse import parse_qs, urlparse
101    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
102    parser = sqlalchemy.engine.url.make_url
103    params = parser(uri).translate_connect_args()
104    params['flavor'] = uri.split(':')[0].split('+')[0]
105    if params['flavor'] == 'postgres':
106        params['flavor'] = 'postgresql'
107    if '?' in uri:
108        parsed_uri = urlparse(uri)
109        for key, value in parse_qs(parsed_uri.query).items():
110            params.update({key: value[0]})
111
112        if '--search_path' in params.get('options', ''):
113            params.update({'schema': params['options'].replace('--search_path=', '', 1)})
114    return params

Parse a URI string into a dictionary of parameters.

Parameters
  • uri (str): The database connection URI.
Returns
  • A dictionary of attributes.
Examples
>>> parse_uri('sqlite:////home/foo/bar.db')
{'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
>>> parse_uri(
...     'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
...     + '/master?driver=ODBC+Driver+17+for+SQL+Server'
... )
{'host': 'localhost', 'database': 'master', 'username': 'sa',
'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
'driver': 'ODBC Driver 17 for SQL Server'}
>>>
class APIConnector(meerschaum.connectors.InstanceConnector):
 22class APIConnector(InstanceConnector):
 23    """
 24    Connect to a Meerschaum API instance.
 25    """
 26
 27    IS_THREAD_SAFE: bool = False
 28    OPTIONAL_ATTRIBUTES: List[str] = ['port', 'client_secret', 'client_id', 'api_key']
 29
 30    from ._request import (
 31        make_request,
 32        get,
 33        post,
 34        put,
 35        patch,
 36        delete,
 37        wget,
 38    )
 39    from ._actions import (
 40        get_actions,
 41        do_action,
 42        do_action_async,
 43        do_action_legacy,
 44    )
 45    from ._misc import get_mrsm_version, get_chaining_status
 46    from ._pipes import (
 47        get_pipe_instance_keys,
 48        register_pipe,
 49        fetch_pipes_keys,
 50        edit_pipe,
 51        sync_pipe,
 52        delete_pipe,
 53        delete_pipe_cache,
 54        get_pipe_data,
 55        get_pipe_id,
 56        get_pipe_attributes,
 57        get_sync_time,
 58        pipe_exists,
 59        create_metadata,
 60        get_pipe_rowcount,
 61        drop_pipe,
 62        clear_pipe,
 63        get_pipe_columns_types,
 64        get_pipe_columns_indices,
 65        get_pipe_docs,
 66        get_pipe_size,
 67        compress_pipe,
 68        decompress_pipe,
 69        vacuum_pipe,
 70        analyze_pipe,
 71        partition_pipe,
 72    )
 73    from ._fetch import fetch
 74    from ._plugins import (
 75        register_plugin,
 76        install_plugin,
 77        delete_plugin,
 78        get_plugins,
 79        get_plugin_attributes,
 80    )
 81    from ._login import login, test_connection
 82    from ._users import (
 83        register_user,
 84        get_user_id,
 85        get_users,
 86        edit_user,
 87        delete_user,
 88        get_user_password_hash,
 89        get_user_type,
 90        get_user_attributes,
 91    )
 92    from ._tokens import (
 93        register_token,
 94        get_token_model,
 95        get_tokens,
 96        edit_token,
 97        invalidate_token,
 98        get_token_scopes,
 99        token_exists,
100        delete_token,
101    )
102    from ._uri import from_uri
103    from ._jobs import (
104        get_jobs,
105        get_job,
106        get_job_metadata,
107        get_job_properties,
108        get_job_exists,
109        delete_job,
110        start_job,
111        create_job,
112        stop_job,
113        pause_job,
114        get_logs,
115        get_job_stop_time,
116        monitor_logs,
117        monitor_logs_async,
118        get_job_is_blocking_on_stdin,
119        get_job_began,
120        get_job_ended,
121        get_job_paused,
122        get_job_status,
123    )
124
125    def __init__(
126        self,
127        label: Optional[str] = None,
128        wait: bool = False,
129        debug: bool = False,
130        **kw
131    ):
132        if 'uri' in kw:
133            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
134            label = label or from_uri_params.get('label', None)
135            _ = from_uri_params.pop('label', None)
136            kw.update(from_uri_params)
137
138        super().__init__('api', label=label, **kw)
139        if 'protocol' not in self.__dict__:
140            self.protocol = (
141                'https' if self.__dict__.get('uri', '').startswith('https')
142                else 'http'
143            )
144
145        if 'uri' not in self.__dict__:
146            self.verify_attributes(required_attributes)
147        else:
148            from meerschaum.connectors.sql import SQLConnector
149            conn_attrs = SQLConnector.parse_uri(self.__dict__['uri'])
150            if 'host' not in conn_attrs:
151                raise Exception(f"Invalid URI for '{self}'.")
152            self.__dict__.update(conn_attrs)
153
154        self.url = (
155            self.protocol + '://' +
156            self.host
157            + (
158                (':' + str(self.port))
159                if self.__dict__.get('port', None)
160                else ''
161            )
162        )
163        self._token = None
164        self._expires = None
165        self._session = None
166        self._instance_keys = self.__dict__.get('instance_keys', None)
167
168
169    @property
170    def URI(self) -> str:
171        """
172        Return the fully qualified URI.
173        """
174        import urllib.parse
175        username = self.__dict__.get('username', None)
176        password = self.__dict__.get('password', None)
177        client_id = self.__dict__.get('client_id', None)
178        client_secret = self.__dict__.get('client_secret', None)
179        api_key = self.__dict__.get('api_key', None)
180        creds = (username + ':' + password + '@') if username and password else ''
181        params = {}
182        params_str = ('?' + urllib.parse.urlencode(params)) if params else ''
183        return (
184            self.protocol
185            + '://'
186            + creds
187            + self.host
188            + (
189                (':' + str(self.port))
190                if self.__dict__.get('port', None)
191                else ''
192            )
193            + params_str
194        )
195
196    @property
197    def session(self):
198        if self._session is None:
199            _ = attempt_import('certifi', lazy=False)
200            requests = attempt_import('requests', lazy=False)
201            if requests:
202                self._session = requests.Session()
203            if self._session is None:
204                error("Failed to import requests. Is requests installed?")
205        return self._session
206
207    @property
208    def token(self):
209        if self.login_scheme == 'api_key':
210            return self.api_key
211
212        expired = (
213            True if self._expires is None else (
214                (
215                    self._expires
216                    <
217                    datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1)
218                )
219            )
220        )
221
222        if self._token is None or expired:
223            success, msg = self.login()
224            if not success and not self.__dict__.get('_emitted_warning'):
225                warn(msg, stack=False)
226                self._emitted_warning = True
227        return self._token
228
229    @property
230    def instance_keys(self) -> Union[str, None]:
231        """
232        Return the instance keys to be sent alongside pipe requests.
233        """
234        return self._instance_keys
235
236    @property
237    def login_scheme(self) -> str:
238        """
239        Return the login scheme to use based on the configured credentials.
240        """
241        if 'username' in self.__dict__:
242            return 'password'
243        if 'client_id' in self.__dict__:
244            return 'client_credentials'
245        elif 'api_key' in self.__dict__:
246            return 'api_key'
247
248        return 'password'

Connect to a Meerschaum API instance.

APIConnector( label: Optional[str] = None, wait: bool = False, debug: bool = False, **kw)
125    def __init__(
126        self,
127        label: Optional[str] = None,
128        wait: bool = False,
129        debug: bool = False,
130        **kw
131    ):
132        if 'uri' in kw:
133            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
134            label = label or from_uri_params.get('label', None)
135            _ = from_uri_params.pop('label', None)
136            kw.update(from_uri_params)
137
138        super().__init__('api', label=label, **kw)
139        if 'protocol' not in self.__dict__:
140            self.protocol = (
141                'https' if self.__dict__.get('uri', '').startswith('https')
142                else 'http'
143            )
144
145        if 'uri' not in self.__dict__:
146            self.verify_attributes(required_attributes)
147        else:
148            from meerschaum.connectors.sql import SQLConnector
149            conn_attrs = SQLConnector.parse_uri(self.__dict__['uri'])
150            if 'host' not in conn_attrs:
151                raise Exception(f"Invalid URI for '{self}'.")
152            self.__dict__.update(conn_attrs)
153
154        self.url = (
155            self.protocol + '://' +
156            self.host
157            + (
158                (':' + str(self.port))
159                if self.__dict__.get('port', None)
160                else ''
161            )
162        )
163        self._token = None
164        self._expires = None
165        self._session = None
166        self._instance_keys = self.__dict__.get('instance_keys', None)

Set the given keyword arguments as attributes.

Parameters
  • type (str): The type of the connector (e.g. sql, api, plugin).
  • label (str): The label for the connector.
Examples

Run mrsm edit config and to edit connectors in the YAML file:

meerschaum:
    connections:
        {type}:
            {label}:
                ### attributes go here
IS_THREAD_SAFE: bool = False
OPTIONAL_ATTRIBUTES: List[str] = ['port', 'client_secret', 'client_id', 'api_key']
url
URI: str
169    @property
170    def URI(self) -> str:
171        """
172        Return the fully qualified URI.
173        """
174        import urllib.parse
175        username = self.__dict__.get('username', None)
176        password = self.__dict__.get('password', None)
177        client_id = self.__dict__.get('client_id', None)
178        client_secret = self.__dict__.get('client_secret', None)
179        api_key = self.__dict__.get('api_key', None)
180        creds = (username + ':' + password + '@') if username and password else ''
181        params = {}
182        params_str = ('?' + urllib.parse.urlencode(params)) if params else ''
183        return (
184            self.protocol
185            + '://'
186            + creds
187            + self.host
188            + (
189                (':' + str(self.port))
190                if self.__dict__.get('port', None)
191                else ''
192            )
193            + params_str
194        )

Return the fully qualified URI.

session
196    @property
197    def session(self):
198        if self._session is None:
199            _ = attempt_import('certifi', lazy=False)
200            requests = attempt_import('requests', lazy=False)
201            if requests:
202                self._session = requests.Session()
203            if self._session is None:
204                error("Failed to import requests. Is requests installed?")
205        return self._session
token
207    @property
208    def token(self):
209        if self.login_scheme == 'api_key':
210            return self.api_key
211
212        expired = (
213            True if self._expires is None else (
214                (
215                    self._expires
216                    <
217                    datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1)
218                )
219            )
220        )
221
222        if self._token is None or expired:
223            success, msg = self.login()
224            if not success and not self.__dict__.get('_emitted_warning'):
225                warn(msg, stack=False)
226                self._emitted_warning = True
227        return self._token
instance_keys: Optional[str]
229    @property
230    def instance_keys(self) -> Union[str, None]:
231        """
232        Return the instance keys to be sent alongside pipe requests.
233        """
234        return self._instance_keys

Return the instance keys to be sent alongside pipe requests.

login_scheme: str
236    @property
237    def login_scheme(self) -> str:
238        """
239        Return the login scheme to use based on the configured credentials.
240        """
241        if 'username' in self.__dict__:
242            return 'password'
243        if 'client_id' in self.__dict__:
244            return 'client_credentials'
245        elif 'api_key' in self.__dict__:
246            return 'api_key'
247
248        return 'password'

Return the login scheme to use based on the configured credentials.

def make_request( self, method: str, r_url: str, headers: Optional[Dict[str, Any]] = None, use_token: bool = True, debug: bool = False, **kwargs: Any) -> requests.models.Response:
 28def make_request(
 29    self,
 30    method: str,
 31    r_url: str,
 32    headers: Optional[Dict[str, Any]] = None,
 33    use_token: bool = True,
 34    debug: bool = False,
 35    **kwargs: Any
 36) -> 'requests.Response':
 37    """
 38    Make a request to this APIConnector's endpoint using the in-memory session.
 39
 40    Parameters
 41    ----------
 42    method: str
 43        The kind of request to make.
 44        Accepted values:
 45        - `'GET'`
 46        - `'OPTIONS'`
 47        - `'HEAD'`
 48        - `'POST'`
 49        - `'PUT'`
 50        - `'PATCH'`
 51        - `'DELETE'`
 52
 53    r_url: str
 54        The relative URL for the endpoint (e.g. `'/pipes'`).
 55
 56    headers: Optional[Dict[str, Any]], default None
 57        The headers to use for the request.
 58        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
 59
 60    use_token: bool, default True
 61        If `True`, add the authorization token to the headers.
 62
 63    debug: bool, default False
 64        Verbosity toggle.
 65
 66    kwargs: Any
 67        All other keyword arguments are passed to `requests.request`.
 68
 69    Returns
 70    -------
 71    A `requests.Reponse` object.
 72    """
 73    if method.upper() not in METHODS:
 74        raise ValueError(f"Method '{method}' is not supported.")
 75
 76    verify = self.__dict__.get('verify', None)
 77    if 'verify' not in kwargs and isinstance(verify, bool):
 78        kwargs['verify'] = verify
 79
 80    headers = (
 81        copy.deepcopy(headers)
 82        if isinstance(headers, dict)
 83        else {}
 84    )
 85
 86    if use_token:
 87        headers.update({'Authorization': f'Bearer {self.token}'})
 88
 89    if 'timeout' not in kwargs:
 90        kwargs['timeout'] = STATIC_CONFIG['api']['default_timeout']
 91
 92    request_url = urllib.parse.urljoin(self.url, r_url)
 93    if debug:
 94        dprint(f"[{self}] Sending a '{method.upper()}' request to {request_url}")
 95
 96    return self.session.request(
 97        method.upper(),
 98        request_url,
 99        headers=headers,
100        **kwargs
101    )

Make a request to this APIConnector's endpoint using the in-memory session.

Parameters
  • method (str): The kind of request to make. Accepted values:
    • 'GET'
    • 'OPTIONS'
    • 'HEAD'
    • 'POST'
    • 'PUT'
    • 'PATCH'
    • 'DELETE'
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def get(self, r_url: str, **kwargs: Any) -> requests.models.Response:
104def get(self, r_url: str, **kwargs: Any) -> 'requests.Response':
105    """
106    Wrapper for `requests.get`.
107
108    Parameters
109    ----------
110    r_url: str
111        The relative URL for the endpoint (e.g. `'/pipes'`).
112
113    headers: Optional[Dict[str, Any]], default None
114        The headers to use for the request.
115        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
116
117    use_token: bool, default True
118        If `True`, add the authorization token to the headers.
119
120    debug: bool, default False
121        Verbosity toggle.
122
123    kwargs: Any
124        All other keyword arguments are passed to `requests.request`.
125
126    Returns
127    -------
128    A `requests.Reponse` object.
129
130    """
131    return self.make_request('GET', r_url, **kwargs)

Wrapper for requests.get.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def post(self, r_url: str, **kwargs: Any) -> requests.models.Response:
134def post(self, r_url: str, **kwargs: Any) -> 'requests.Response':
135    """
136    Wrapper for `requests.post`.
137
138    Parameters
139    ----------
140    r_url: str
141        The relative URL for the endpoint (e.g. `'/pipes'`).
142
143    headers: Optional[Dict[str, Any]], default None
144        The headers to use for the request.
145        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
146
147    use_token: bool, default True
148        If `True`, add the authorization token to the headers.
149
150    debug: bool, default False
151        Verbosity toggle.
152
153    kwargs: Any
154        All other keyword arguments are passed to `requests.request`.
155
156    Returns
157    -------
158    A `requests.Reponse` object.
159
160    """
161    return self.make_request('POST', r_url, **kwargs)

Wrapper for requests.post.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def put(self, r_url: str, **kwargs: Any) -> requests.models.Response:
193def put(self, r_url: str, **kwargs: Any) -> 'requests.Response':
194    """
195    Wrapper for `requests.put`.
196
197    Parameters
198    ----------
199    r_url: str
200        The relative URL for the endpoint (e.g. `'/pipes'`).
201
202    headers: Optional[Dict[str, Any]], default None
203        The headers to use for the request.
204        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
205
206    use_token: bool, default True
207        If `True`, add the authorization token to the headers.
208
209    debug: bool, default False
210        Verbosity toggle.
211
212    kwargs: Any
213        All other keyword arguments are passed to `requests.request`.
214
215    Returns
216    -------
217    A `requests.Reponse` object.
218    """
219    return self.make_request('PUT', r_url, **kwargs)

Wrapper for requests.put.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def patch(self, r_url: str, **kwargs: Any) -> requests.models.Response:
164def patch(self, r_url: str, **kwargs: Any) -> 'requests.Response':
165    """
166    Wrapper for `requests.patch`.
167
168    Parameters
169    ----------
170    r_url: str
171        The relative URL for the endpoint (e.g. `'/pipes'`).
172
173    headers: Optional[Dict[str, Any]], default None
174        The headers to use for the request.
175        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
176
177    use_token: bool, default True
178        If `True`, add the authorization token to the headers.
179
180    debug: bool, default False
181        Verbosity toggle.
182
183    kwargs: Any
184        All other keyword arguments are passed to `requests.request`.
185
186    Returns
187    -------
188    A `requests.Reponse` object.
189    """
190    return self.make_request('PATCH', r_url, **kwargs)

Wrapper for requests.patch.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def delete(self, r_url: str, **kwargs: Any) -> requests.models.Response:
222def delete(self, r_url: str, **kwargs: Any) -> 'requests.Response':
223    """
224    Wrapper for `requests.delete`.
225
226    Parameters
227    ----------
228    r_url: str
229        The relative URL for the endpoint (e.g. `'/pipes'`).
230
231    headers: Optional[Dict[str, Any]], default None
232        The headers to use for the request.
233        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
234
235    use_token: bool, default True
236        If `True`, add the authorization token to the headers.
237
238    debug: bool, default False
239        Verbosity toggle.
240
241    kwargs: Any
242        All other keyword arguments are passed to `requests.request`.
243
244    Returns
245    -------
246    A `requests.Reponse` object.
247    """
248    return self.make_request('DELETE', r_url, **kwargs)

Wrapper for requests.delete.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def wget( self, r_url: str, dest: Union[str, pathlib.Path, NoneType] = None, headers: Optional[Dict[str, Any]] = None, use_token: bool = True, debug: bool = False, **kw: Any) -> pathlib.Path:
251def wget(
252    self,
253    r_url: str,
254    dest: Optional[Union[str, pathlib.Path]] = None,
255    headers: Optional[Dict[str, Any]] = None,
256    use_token: bool = True,
257    debug: bool = False,
258    **kw: Any
259) -> pathlib.Path:
260    """Mimic wget with requests."""
261    from meerschaum.utils.misc import wget
262    if headers is None:
263        headers = {}
264    if use_token:
265        headers.update({'Authorization': f'Bearer {self.token}'})
266    request_url = urllib.parse.urljoin(self.url, r_url)
267    if debug:
268        dprint(
269            f"[{self}] Downloading {request_url}"
270            + (f' to {dest}' if dest is not None else '')
271            + "..."
272        )
273    return wget(request_url, dest=dest, headers=headers, **kw)

Mimic wget with requests.

def get_actions(self):
24def get_actions(self):
25    """Get available actions from the API instance."""
26    return self.get(ACTIONS_ENDPOINT)

Get available actions from the API instance.

def do_action(self, sysargs: List[str]) -> Tuple[bool, str]:
29def do_action(self, sysargs: List[str]) -> SuccessTuple:
30    """
31    Execute a Meerschaum action remotely.
32    """
33    return asyncio.run(self.do_action_async(sysargs))

Execute a Meerschaum action remotely.

async def do_action_async( self, sysargs: List[str], callback_function: Callable[[str], NoneType] = functools.partial(<built-in function print>, end='')) -> Tuple[bool, str]:
36async def do_action_async(
37    self,
38    sysargs: List[str],
39    callback_function: Callable[[str], None] = partial(print, end=''),
40) -> SuccessTuple:
41    """
42    Execute an action as a temporary remote job.
43    """
44    from meerschaum._internal.arguments import remove_api_executor_keys
45    from meerschaum.utils.misc import generate_password
46    sysargs = remove_api_executor_keys(sysargs)
47
48    job_name = TEMP_PREFIX + generate_password(12)
49    job = mrsm.Job(job_name, sysargs, executor_keys=str(self))
50
51    start_success, start_msg = job.start()
52    if not start_success:
53        return start_success, start_msg
54
55    await job.monitor_logs_async(
56        callback_function=callback_function,
57        stop_on_exit=True,
58        strip_timestamps=True,
59    )
60
61    success, msg = job.result
62    job.delete()
63    return success, msg

Execute an action as a temporary remote job.

def do_action_legacy( self, action: Optional[List[str]] = None, sysargs: Optional[List[str]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
 66def do_action_legacy(
 67    self,
 68    action: Optional[List[str]] = None,
 69    sysargs: Optional[List[str]] = None,
 70    debug: bool = False,
 71    **kw
 72) -> SuccessTuple:
 73    """
 74    NOTE: This method is deprecated.
 75    Please use `do_action()` or `do_action_async()`.
 76
 77    Execute a Meerschaum action remotely.
 78
 79    If `sysargs` are provided, parse those instead.
 80    Otherwise infer everything from keyword arguments.
 81
 82    Examples
 83    --------
 84    >>> conn = mrsm.get_connector('api:main')
 85    >>> conn.do_action(['show', 'pipes'])
 86    (True, "Success")
 87    >>> conn.do_action(['show', 'arguments'], name='test')
 88    (True, "Success")
 89    """
 90    import sys, json
 91    from meerschaum.utils.debug import dprint
 92    from meerschaum._internal.static import STATIC_CONFIG
 93    from meerschaum.utils.misc import json_serialize_datetime
 94    if action is None:
 95        action = []
 96
 97    if sysargs is not None and action and action[0] == '':
 98        from meerschaum._internal.arguments import parse_arguments
 99        if debug:
100            dprint(f"Parsing sysargs:\n{sysargs}")
101        json_dict = parse_arguments(sysargs)
102    else:
103        json_dict = kw
104        json_dict['action'] = action
105        if 'noask' not in kw:
106            json_dict['noask'] = True
107        if 'yes' not in kw:
108            json_dict['yes'] = True
109        if debug:
110            json_dict['debug'] = debug
111
112    root_action = json_dict['action'][0]
113    del json_dict['action'][0]
114    r_url = f"{STATIC_CONFIG['api']['endpoints']['actions']}/{root_action}"
115    
116    if debug:
117        from meerschaum.utils.formatting import pprint
118        dprint(f"Sending data to '{self.url + r_url}':")
119        pprint(json_dict, stream=sys.stderr)
120
121    response = self.post(
122        r_url,
123        data = json.dumps(json_dict, default=json_serialize_datetime),
124        debug = debug,
125    )
126    try:
127        response_list = json.loads(response.text)
128        if isinstance(response_list, dict) and 'detail' in response_list:
129            return False, response_list['detail']
130    except Exception as e:
131        print(f"Invalid response: {response}")
132        print(e)
133        return False, response.text
134    if debug:
135        dprint(response)
136    try:
137        return response_list[0], response_list[1]
138    except Exception as e:
139        return False, f"Failed to parse result from action '{root_action}'"

NOTE: This method is deprecated. Please use do_action() or do_action_async().

Execute a Meerschaum action remotely.

If sysargs are provided, parse those instead. Otherwise infer everything from keyword arguments.

Examples
>>> conn = mrsm.get_connector('api:main')
>>> conn.do_action(['show', 'pipes'])
(True, "Success")
>>> conn.do_action(['show', 'arguments'], name='test')
(True, "Success")
def get_mrsm_version(self, **kw) -> Optional[str]:
13def get_mrsm_version(self, **kw) -> Optional[str]:
14    """
15    Return the Meerschaum version of the API instance.
16    """
17    from meerschaum._internal.static import STATIC_CONFIG
18    try:
19        j = self.get(
20            STATIC_CONFIG['api']['endpoints']['version'] + '/mrsm',
21            use_token=False,
22            **kw
23        ).json()
24    except Exception:
25        return None
26    if isinstance(j, dict) and 'detail' in j:
27        return None
28    return j

Return the Meerschaum version of the API instance.

def get_chaining_status(self, **kw) -> Optional[bool]:
31def get_chaining_status(self, **kw) -> Optional[bool]:
32    """
33    Fetch the chaining status of the API instance.
34    """
35    from meerschaum._internal.static import STATIC_CONFIG
36    try:
37        response = self.get(
38            STATIC_CONFIG['api']['endpoints']['chaining'],
39            use_token = True,
40            **kw
41        )
42        if not response:
43            return None
44    except Exception:
45        return None
46
47    return response.json()

Fetch the chaining status of the API instance.

def get_pipe_instance_keys(self, pipe: meerschaum.Pipe) -> Optional[str]:
35def get_pipe_instance_keys(self, pipe: mrsm.Pipe) -> Union[str, None]:
36    """
37    Return the configured instance keys for a pipe if set,
38    else fall back to the default `instance_keys` for this `APIConnector`.
39    """
40    return pipe.parameters.get('instance_keys', self.instance_keys)

Return the configured instance keys for a pipe if set, else fall back to the default instance_keys for this APIConnector.

def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
43def register_pipe(
44    self,
45    pipe: mrsm.Pipe,
46    debug: bool = False
47) -> SuccessTuple:
48    """Submit a POST to the API to register a new Pipe object.
49    Returns a tuple of (success_bool, response_dict).
50    """
51    from meerschaum.utils.debug import dprint
52    r_url = pipe_r_url(pipe)
53    response = self.post(
54        r_url + '/register',
55        json=pipe._attributes.get('parameters', {}),
56        params={'instance_keys': self.get_pipe_instance_keys(pipe)},
57        debug=debug,
58    )
59    if debug:
60        dprint(response.text)
61
62    if not response:
63        return False, response.text
64
65    response_data = response.json()
66    if isinstance(response_data, list):
67        response_tuple = response_data[0], response_data[1]
68    elif 'detail' in response.json():
69        response_tuple = response.__bool__(), response_data['detail']
70    else:
71        response_tuple = response.__bool__(), response.text
72    return response_tuple

Submit a POST to the API to register a new Pipe object. Returns a tuple of (success_bool, response_dict).

def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False) -> Union[Dict[Union[int, str], Tuple[str, str, Optional[str], Dict[str, Any]]], List[Tuple[str, str, Optional[str]]]]:
108def fetch_pipes_keys(
109    self,
110    connector_keys: Optional[List[str]] = None,
111    metric_keys: Optional[List[str]] = None,
112    location_keys: Optional[List[str]] = None,
113    tags: Optional[List[str]] = None,
114    params: Optional[Dict[str, Any]] = None,
115    debug: bool = False
116) -> Union[
117    Dict[Union[int, str], Tuple[str, str, Union[str, None], Dict[str, Any]]],
118    List[Tuple[str, str, Union[str, None]]],
119]:
120    """
121    Fetch registered Pipes' keys from the API.
122
123    Parameters
124    ----------
125    connector_keys: Optional[List[str]], default None
126        The connector keys for the query.
127
128    metric_keys: Optional[List[str]], default None
129        The metric keys for the query.
130
131    location_keys: Optional[List[str]], default None
132        The location keys for the query.
133
134    tags: Optional[List[str]], default None
135        A list of tags for the query.
136
137    params: Optional[Dict[str, Any]], default None
138        A parameters dictionary for filtering against the `pipes` table
139        (e.g. `{'connector_keys': 'plugin:foo'}`).
140        Not recommeded to be used.
141
142    debug: bool, default False
143        Verbosity toggle.
144
145    Returns
146    -------
147    A dictionary mapping pipe IDs to key tuples, or a list of key tuples for older servers.
148    """
149    from meerschaum._internal.static import STATIC_CONFIG
150    if connector_keys is None:
151        connector_keys = []
152    if metric_keys is None:
153        metric_keys = []
154    if location_keys is None:
155        location_keys = []
156    if tags is None:
157        tags = []
158
159    r_url = STATIC_CONFIG['api']['endpoints']['pipes'] + '/keys'
160    try:
161        j = self.get(
162            r_url,
163            params={
164                'connector_keys': json.dumps(connector_keys),
165                'metric_keys': json.dumps(metric_keys),
166                'location_keys': json.dumps(location_keys),
167                'tags': json.dumps(tags),
168                'params': json.dumps(params),
169                'instance_keys': self.instance_keys,
170                'as_dict': True,
171            },
172            debug=debug
173        ).json()
174    except Exception as e:
175        import traceback
176        traceback.print_exc()
177        error(str(e))
178
179    if 'detail' in j:
180        error(j['detail'], stack=False)
181
182    if isinstance(j, dict):
183        return {
184            (int(k) if str(k).isdigit() else k): tuple(v)
185            for k, v in j.items()
186        }
187    return [tuple(r) for r in j]

Fetch registered Pipes' keys from the API.

Parameters
  • connector_keys (Optional[List[str]], default None): The connector keys for the query.
  • metric_keys (Optional[List[str]], default None): The metric keys for the query.
  • location_keys (Optional[List[str]], default None): The location keys for the query.
  • tags (Optional[List[str]], default None): A list of tags for the query.
  • params (Optional[Dict[str, Any]], default None): A parameters dictionary for filtering against the pipes table (e.g. {'connector_keys': 'plugin:foo'}). Not recommeded to be used.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A dictionary mapping pipe IDs to key tuples, or a list of key tuples for older servers.
def edit_pipe( self, pipe: meerschaum.Pipe, patch: bool = False, debug: bool = False) -> Tuple[bool, str]:
 75def edit_pipe(
 76    self,
 77    pipe: mrsm.Pipe,
 78    patch: bool = False,
 79    debug: bool = False,
 80) -> SuccessTuple:
 81    """Submit a PATCH to the API to edit an existing Pipe object.
 82    Returns a tuple of (success_bool, response_dict).
 83    """
 84    from meerschaum.utils.debug import dprint
 85    ### NOTE: if `parameters` is supplied in the Pipe constructor,
 86    ###       then `pipe.parameters` will exist and not be fetched from the database.
 87    r_url = pipe_r_url(pipe)
 88    response = self.patch(
 89        r_url + '/edit',
 90        params={'patch': patch, 'instance_keys': self.get_pipe_instance_keys(pipe)},
 91        json=pipe.get_parameters(apply_symlinks=False),
 92        debug=debug,
 93    )
 94    if debug:
 95        dprint(response.text)
 96
 97    response_data = response.json()
 98
 99    if isinstance(response.json(), list):
100        response_tuple = response_data[0], response_data[1]
101    elif 'detail' in response.json():
102        response_tuple = response.__bool__(), response_data['detail']
103    else:
104        response_tuple = response.__bool__(), response.text
105    return response_tuple

Submit a PATCH to the API to edit an existing Pipe object. Returns a tuple of (success_bool, response_dict).

def sync_pipe( self, pipe: meerschaum.Pipe, df: "Optional[Union['pd.DataFrame', Dict[Any, Any], str]]" = None, chunksize: Optional[int] = -1, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
190def sync_pipe(
191    self,
192    pipe: mrsm.Pipe,
193    df: Optional[Union['pd.DataFrame', Dict[Any, Any], str]] = None,
194    chunksize: Optional[int] = -1,
195    debug: bool = False,
196    **kw: Any
197) -> SuccessTuple:
198    """Sync a DataFrame into a Pipe."""
199    from decimal import Decimal
200    from meerschaum.utils.debug import dprint
201    from meerschaum.utils.dtypes import json_serialize_value
202    from meerschaum.utils.misc import items_str, interval_str
203    from meerschaum.config import get_config
204    from meerschaum.utils.packages import attempt_import
205    from meerschaum.utils.dataframe import get_special_cols, to_json
206    begin = time.perf_counter()
207    more_itertools = attempt_import('more_itertools')
208    if df is None:
209        msg = f"DataFrame is `None`. Cannot sync {pipe}."
210        return False, msg
211
212    def get_json_str(c):
213        if isinstance(c, str):
214            return c
215        if isinstance(c, (dict, list, tuple)):
216            return json.dumps(c, default=json_serialize_value)
217        return to_json(c, orient='columns', geometry_format='wkb_hex')
218
219    df = json.loads(df) if isinstance(df, str) else df
220
221    _chunksize: Optional[int] = (1 if chunksize is None else (
222        get_config('system', 'connectors', 'sql', 'chunksize') if chunksize == -1
223        else chunksize
224    ))
225    keys: List[str] = list(df.columns)
226    chunks = []
227    if hasattr(df, 'index'):
228        df = df.reset_index(drop=True)
229        is_dask = 'dask' in df.__module__
230        chunks = (
231            (df.iloc[i] for i in more_itertools.chunked(df.index, _chunksize))
232            if not is_dask
233            else [partition.compute() for partition in df.partitions]
234        )
235
236    elif isinstance(df, dict):
237        ### `_chunks` is a dict of lists of dicts.
238        ### e.g. {'a' : [ {'a':[1, 2]}, {'a':[3, 4]} ] }
239        _chunks = {k: [] for k in keys}
240        for k in keys:
241            chunk_iter = more_itertools.chunked(df[k], _chunksize)
242            for l in chunk_iter:
243                _chunks[k].append({k: l})
244
245        ### `chunks` is a list of dicts (e.g. orient by rows in pandas JSON).
246        for k, l in _chunks.items():
247            for i, c in enumerate(l):
248                try:
249                    chunks[i].update(c)
250                except IndexError:
251                    chunks.append(c)
252    elif isinstance(df, list):
253        chunks = (df[i] for i in more_itertools.chunked(df, _chunksize))
254
255    ### Send columns in case the user has defined them locally.
256    request_params = kw.copy()
257    if pipe.columns:
258        request_params['columns'] = json.dumps(pipe.columns)
259    request_params['instance_keys'] = self.get_pipe_instance_keys(pipe)
260    r_url = pipe_r_url(pipe) + '/data'
261
262    rowcount = 0
263    num_success_chunks = 0
264    for i, c in enumerate(chunks):
265        if debug:
266            dprint(f"[{self}] Posting chunk {i} to {r_url}...")
267        if len(c) == 0:
268            if debug:
269                dprint(f"[{self}] Skipping empty chunk...")
270            continue
271        json_str = get_json_str(c)
272
273        try:
274            response = self.post(
275                r_url,
276                params=request_params,
277                data=json_str,
278                debug=debug,
279            )
280        except Exception as e:
281            msg = f"Failed to post a chunk to {pipe}:\n{e}"
282            warn(msg)
283            return False, msg
284            
285        if not response:
286            return False, f"Failed to sync a chunk:\n{response.text}"
287
288        try:
289            j = json.loads(response.text)
290        except Exception as e:
291            return False, f"Failed to parse response from syncing {pipe}:\n{e}"
292
293        if isinstance(j, dict) and 'detail' in j:
294            return False, j['detail']
295
296        try:
297            j = tuple(j)
298        except Exception:
299            return False, response.text
300
301        if debug:
302            dprint("Received response: " + str(j))
303        if not j[0]:
304            return j
305
306        rowcount += len(c)
307        num_success_chunks += 1
308
309    self.delete_pipe_cache(pipe, debug=debug)
310    success_tuple = True, (
311        f"It took {interval_str(timedelta(seconds=(time.perf_counter() - begin)))} "
312        + f"to sync {rowcount:,} row"
313        + ('s' if rowcount != 1 else '')
314        + f" across {num_success_chunks:,} chunk" + ('s' if num_success_chunks != 1 else '') +
315        f" to {pipe}."
316    )
317    return success_tuple

Sync a DataFrame into a Pipe.

def delete_pipe( self, pipe: Optional[meerschaum.Pipe] = None, debug: bool = False) -> Tuple[bool, str]:
342def delete_pipe(
343    self,
344    pipe: Optional[mrsm.Pipe] = None,
345    debug: bool = False,
346) -> SuccessTuple:
347    """Delete a Pipe and drop its table."""
348    if pipe is None:
349        error("Pipe cannot be None.")
350    r_url = pipe_r_url(pipe)
351    response = self.delete(
352        r_url + '/delete',
353        params={'instance_keys': self.get_pipe_instance_keys(pipe)},
354        debug=debug,
355    )
356    if debug:
357        dprint(response.text)
358
359    response_data = response.json()
360    if isinstance(response.json(), list):
361        response_tuple = response_data[0], response_data[1]
362    elif 'detail' in response.json():
363        response_tuple = response.__bool__(), response_data['detail']
364    else:
365        response_tuple = response.__bool__(), response.text
366    return response_tuple

Delete a Pipe and drop its table.

def delete_pipe_cache( self, pipe: meerschaum.Pipe, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
320def delete_pipe_cache(
321    self,
322    pipe: mrsm.Pipe,
323    debug: bool = False,
324    **kw: Any
325) -> SuccessTuple:
326    """Invalidate the server-side cache for a pipe."""
327    r_url = pipe_r_url(pipe)
328    response = self.delete(
329        r_url + '/cache',
330        params={'instance_keys': self.get_pipe_instance_keys(pipe)},
331        debug=debug,
332    )
333    if not response.ok:
334        return False, f"Failed to invalidate cache for {pipe}: {response.text}"
335    try:
336        data = response.json()
337        return tuple(data) if isinstance(data, list) else (response.ok, response.text)
338    except Exception:
339        return response.ok, response.text

Invalidate the server-side cache for a pipe.

def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, as_chunks: bool = False, debug: bool = False, **kw: Any) -> Optional[pandas.DataFrame]:
369def get_pipe_data(
370    self,
371    pipe: mrsm.Pipe,
372    select_columns: Optional[List[str]] = None,
373    omit_columns: Optional[List[str]] = None,
374    begin: Union[str, datetime, int, None] = None,
375    end: Union[str, datetime, int, None] = None,
376    params: Optional[Dict[str, Any]] = None,
377    as_chunks: bool = False,
378    debug: bool = False,
379    **kw: Any
380) -> Union[pandas.DataFrame, None]:
381    """Fetch data from the API."""
382    r_url = pipe_r_url(pipe)
383    while True:
384        try:
385            response = self.get(
386                r_url + "/data",
387                params={
388                    'select_columns': json.dumps(select_columns),
389                    'omit_columns': json.dumps(omit_columns),
390                    'begin': begin,
391                    'end': end,
392                    'params': json.dumps(params, default=str),
393                    'instance': self.get_pipe_instance_keys(pipe),
394                    'as_chunks': as_chunks,
395                },
396                debug=debug
397            )
398            if not response.ok:
399                return None
400            j = response.json()
401        except Exception as e:
402            warn(f"Failed to get data for {pipe}:\n{e}")
403            return None
404        if isinstance(j, dict) and 'detail' in j:
405            return False, j['detail']
406        break
407
408    from meerschaum.utils.dataframe import parse_df_datetimes, add_missing_cols_to_df
409    from meerschaum.utils.dtypes import are_dtypes_equal
410    try:
411        df = parse_df_datetimes(
412            j,
413            ignore_cols=[
414                col
415                for col, dtype in pipe.dtypes.items()
416                if not are_dtypes_equal(str(dtype), 'datetime')
417            ],
418            strip_timezone=(pipe.tzinfo is None),
419            debug=debug,
420        )
421    except Exception as e:
422        warn(f"Failed to parse response for {pipe}:\n{e}")
423        return None
424
425    if len(df.columns) == 0:
426        return add_missing_cols_to_df(df, pipe.dtypes)
427
428    return df

Fetch data from the API.

def get_pipe_id( self, pipe: meerschaum.Pipe, debug: bool = False) -> Union[int, str, NoneType]:
431def get_pipe_id(
432    self,
433    pipe: mrsm.Pipe,
434    debug: bool = False,
435) -> Union[int, str, None]:
436    """Get a Pipe's ID from the API."""
437    from meerschaum.utils.misc import is_int
438    r_url = pipe_r_url(pipe)
439    response = self.get(
440        r_url + '/id',
441        params={
442            'instance': self.get_pipe_instance_keys(pipe),
443        },
444        debug=debug,
445    )
446    if debug:
447        dprint(f"Got pipe ID: {response.text}")
448    try:
449        if is_int(response.text):
450            return int(response.text)
451        if response.text and response.text[0] != '{':
452            return response.text
453    except Exception as e:
454        warn(f"Failed to get the ID for {pipe}:\n{e}")
455    return None

Get a Pipe's ID from the API.

def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, Any]:
458def get_pipe_attributes(
459    self,
460    pipe: mrsm.Pipe,
461    debug: bool = False,
462) -> Dict[str, Any]:
463    """Get a Pipe's attributes from the API
464
465    Parameters
466    ----------
467    pipe: meerschaum.Pipe
468        The pipe whose attributes we are fetching.
469        
470    Returns
471    -------
472    A dictionary of a pipe's attributes.
473    If the pipe does not exist, return an empty dictionary.
474    """
475    r_url = pipe_r_url(pipe)
476    response = self.get(
477        r_url + '/attributes',
478        params={
479            'instance': self.get_pipe_instance_keys(pipe),
480        },
481        debug=debug
482    )
483    try:
484        return json.loads(response.text)
485    except Exception as e:
486        warn(f"Failed to get the attributes for {pipe}:\n{e}")
487    return {}

Get a Pipe's attributes from the API

Parameters
Returns
  • A dictionary of a pipe's attributes.
  • If the pipe does not exist, return an empty dictionary.
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, debug: bool = False) -> Union[datetime.datetime, int, NoneType]:
490def get_sync_time(
491    self,
492    pipe: mrsm.Pipe,
493    params: Optional[Dict[str, Any]] = None,
494    newest: bool = True,
495    debug: bool = False,
496) -> Union[datetime, int, None]:
497    """Get a Pipe's most recent datetime value from the API.
498
499    Parameters
500    ----------
501    pipe: meerschaum.Pipe
502        The pipe to select from.
503
504    params: Optional[Dict[str, Any]], default None
505        Optional params dictionary to build the WHERE clause.
506
507    newest: bool, default True
508        If `True`, get the most recent datetime (honoring `params`).
509        If `False`, get the oldest datetime (ASC instead of DESC).
510
511    Returns
512    -------
513    The most recent (or oldest if `newest` is `False`) datetime of a pipe,
514    rounded down to the closest minute.
515    """
516    from meerschaum.utils.misc import is_int
517    from meerschaum.utils.warnings import warn
518    r_url = pipe_r_url(pipe)
519    response = self.get(
520        r_url + '/sync_time',
521        json=params,
522        params={
523            'instance': self.get_pipe_instance_keys(pipe),
524            'newest': newest,
525            'debug': debug,
526        },
527        debug=debug,
528    )
529    if not response:
530        warn(f"Failed to get the sync time for {pipe}:\n" + response.text)
531        return None
532
533    j = response.json()
534    if j is None:
535        dt = None
536    else:
537        try:
538            dt = (
539                datetime.fromisoformat(j)
540                if not is_int(j)
541                else int(j)
542            )
543        except Exception as e:
544            warn(f"Failed to parse the sync time '{j}' for {pipe}:\n{e}")
545            dt = None
546    return dt

Get a Pipe's most recent datetime value from the API.

Parameters
  • pipe (meerschaum.Pipe): The pipe to select from.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause.
  • newest (bool, default True): If True, get the most recent datetime (honoring params). If False, get the oldest datetime (ASC instead of DESC).
Returns
  • The most recent (or oldest if newest is False) datetime of a pipe,
  • rounded down to the closest minute.
def pipe_exists(self, pipe: meerschaum.Pipe, debug: bool = False) -> bool:
549def pipe_exists(
550    self,
551    pipe: mrsm.Pipe,
552    debug: bool = False
553) -> bool:
554    """Check the API to see if a Pipe exists.
555
556    Parameters
557    ----------
558    pipe: 'meerschaum.Pipe'
559        The pipe which were are querying.
560        
561    Returns
562    -------
563    A bool indicating whether a pipe's underlying table exists.
564    """
565    from meerschaum.utils.debug import dprint
566    from meerschaum.utils.warnings import warn
567    r_url = pipe_r_url(pipe)
568    response = self.get(
569        r_url + '/exists',
570        params={
571            'instance': self.get_pipe_instance_keys(pipe),
572        },
573        debug=debug,
574    )
575    if not response:
576        warn(f"Failed to check if {pipe} exists:\n{response.text}")
577        return False
578    if debug:
579        dprint("Received response: " + str(response.text))
580    j = response.json()
581    if isinstance(j, dict) and 'detail' in j:
582        warn(j['detail'])
583    return j

Check the API to see if a Pipe exists.

Parameters
Returns
  • A bool indicating whether a pipe's underlying table exists.
def create_metadata(self, debug: bool = False) -> bool:
586def create_metadata(
587    self,
588    debug: bool = False
589) -> bool:
590    """Create metadata tables.
591
592    Returns
593    -------
594    A bool indicating success.
595    """
596    from meerschaum.utils.debug import dprint
597    from meerschaum._internal.static import STATIC_CONFIG
598    r_url = STATIC_CONFIG['api']['endpoints']['metadata']
599    response = self.post(r_url, debug=debug)
600    if debug:
601        dprint("Create metadata response: {response.text}")
602    try:
603        _ = json.loads(response.text)
604    except Exception as e:
605        warn(f"Failed to create metadata on {self}:\n{e}")
606    return False

Create metadata tables.

Returns
  • A bool indicating success.
def get_pipe_rowcount( self, pipe: meerschaum.Pipe, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, remote: bool = False, debug: bool = False) -> int:
609def get_pipe_rowcount(
610    self,
611    pipe: mrsm.Pipe,
612    begin: Union[str, datetime, int, None] = None,
613    end: Union[str, datetime, int, None] = None,
614    params: Optional[Dict[str, Any]] = None,
615    remote: bool = False,
616    debug: bool = False,
617) -> int:
618    """Get a pipe's row count from the API.
619
620    Parameters
621    ----------
622    pipe: 'meerschaum.Pipe':
623        The pipe whose row count we are counting.
624        
625    begin: Union[str, datetime, int, None], default None
626        If provided, bound the count by this datetime.
627
628    end: Union[str, datetime, int, None], default None
629        If provided, bound the count by this datetime.
630
631    params: Optional[Dict[str, Any]], default None
632        If provided, bound the count by these parameters.
633
634    remote: bool, default False
635        If `True`, return the rowcount for the fetch definition.
636
637    Returns
638    -------
639    The number of rows in the pipe's table, bound the given parameters.
640    If the table does not exist, return 0.
641    """
642    r_url = pipe_r_url(pipe)
643    response = self.get(
644        r_url + "/rowcount",
645        json = params,
646        params = {
647            'begin': begin,
648            'end': end,
649            'remote': remote,
650            'instance': self.get_pipe_instance_keys(pipe),
651        },
652        debug = debug
653    )
654    if not response:
655        warn(f"Failed to get the rowcount for {pipe}:\n{response.text}")
656        return 0
657    try:
658        return int(json.loads(response.text))
659    except Exception as e:
660        warn(f"Failed to get the rowcount for {pipe}:\n{e}")
661    return 0

Get a pipe's row count from the API.

Parameters
  • pipe ('meerschaum.Pipe':): The pipe whose row count we are counting.
  • begin (Union[str, datetime, int, None], default None): If provided, bound the count by this datetime.
  • end (Union[str, datetime, int, None], default None): If provided, bound the count by this datetime.
  • params (Optional[Dict[str, Any]], default None): If provided, bound the count by these parameters.
  • remote (bool, default False): If True, return the rowcount for the fetch definition.
Returns
  • The number of rows in the pipe's table, bound the given parameters.
  • If the table does not exist, return 0.
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
664def drop_pipe(
665    self,
666    pipe: mrsm.Pipe,
667    debug: bool = False
668) -> SuccessTuple:
669    """
670    Drop a pipe's table but maintain its registration.
671
672    Parameters
673    ----------
674    pipe: meerschaum.Pipe:
675        The pipe to be dropped.
676        
677    Returns
678    -------
679    A success tuple (bool, str).
680    """
681    from meerschaum.utils.warnings import error
682    from meerschaum.utils.debug import dprint
683    if pipe is None:
684        error("Pipe cannot be None.")
685    r_url = pipe_r_url(pipe)
686    response = self.delete(
687        r_url + '/drop',
688        params={
689            'instance': self.get_pipe_instance_keys(pipe),
690        },
691        debug=debug,
692    )
693    if debug:
694        dprint(response.text)
695
696    try:
697        data = response.json()
698    except Exception as e:
699        return False, f"Failed to drop {pipe}."
700
701    if isinstance(data, list):
702        response_tuple = data[0], data[1]
703    elif 'detail' in response.json():
704        response_tuple = response.__bool__(), data['detail']
705    else:
706        response_tuple = response.__bool__(), response.text
707
708    return response_tuple

Drop a pipe's table but maintain its registration.

Parameters
Returns
  • A success tuple (bool, str).
def clear_pipe( self, pipe: meerschaum.Pipe, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
711def clear_pipe(
712    self,
713    pipe: mrsm.Pipe,
714    begin: Union[str, datetime, int, None] = None,
715    end: Union[str, datetime, int, None] = None,
716    params: Optional[Dict[str, Any]] = None,
717    debug: bool = False,
718    **kw
719) -> SuccessTuple:
720    """
721    Delete rows in a pipe's table.
722
723    Parameters
724    ----------
725    pipe: meerschaum.Pipe
726        The pipe with rows to be deleted.
727        
728    Returns
729    -------
730    A success tuple.
731    """
732    r_url = pipe_r_url(pipe)
733    response = self.delete(
734        r_url + '/clear',
735        params={
736            'begin': begin,
737            'end': end,
738            'params': json.dumps(params),
739            'instance': self.get_pipe_instance_keys(pipe),
740        },
741        debug=debug,
742    )
743    if debug:
744        dprint(response.text)
745
746    try:
747        data = response.json()
748    except Exception as e:
749        return False, f"Failed to clear {pipe} with constraints {begin=}, {end=}, {params=}."
750
751    if isinstance(data, list):
752        response_tuple = data[0], data[1]
753    elif 'detail' in response.json():
754        response_tuple = response.__bool__(), data['detail']
755    else:
756        response_tuple = response.__bool__(), response.text
757
758    return response_tuple

Delete rows in a pipe's table.

Parameters
Returns
  • A success tuple.
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False) -> Optional[Dict[str, str]]:
761def get_pipe_columns_types(
762    self,
763    pipe: mrsm.Pipe,
764    debug: bool = False,
765) -> Union[Dict[str, str], None]:
766    """
767    Fetch the columns and types of the pipe's table.
768
769    Parameters
770    ----------
771    pipe: meerschaum.Pipe
772        The pipe whose columns to be queried.
773
774    Returns
775    -------
776    A dictionary mapping column names to their database types.
777
778    Examples
779    --------
780    >>> {
781    ...   'dt': 'TIMESTAMP WITHOUT TIMEZONE',
782    ...   'id': 'BIGINT',
783    ...   'val': 'DOUBLE PRECISION',
784    ... }
785    >>>
786    """
787    r_url = pipe_r_url(pipe) + '/columns/types'
788    response = self.get(
789        r_url,
790        params={
791            'instance': self.get_pipe_instance_keys(pipe),
792        },
793        debug=debug,
794    )
795    j = response.json()
796    if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1:
797        warn(j['detail'])
798        return None
799    if not isinstance(j, dict):
800        warn(response.text)
801        return None
802    return j

Fetch the columns and types of the pipe's table.

Parameters
Returns
  • A dictionary mapping column names to their database types.
Examples
>>> {
...   'dt': 'TIMESTAMP WITHOUT TIMEZONE',
...   'id': 'BIGINT',
...   'val': 'DOUBLE PRECISION',
... }
>>>
def get_pipe_columns_indices( self, pipe: meerschaum.Pipe, debug: bool = False) -> Optional[Dict[str, str]]:
805def get_pipe_columns_indices(
806    self,
807    pipe: mrsm.Pipe,
808    debug: bool = False,
809) -> Union[Dict[str, str], None]:
810    """
811    Fetch the index information for a pipe.
812
813    Parameters
814    ----------
815    pipe: mrsm.Pipe
816        The pipe whose columns to be queried.
817
818    Returns
819    -------
820    A dictionary mapping column names to a list of associated index information.
821    """
822    r_url = pipe_r_url(pipe) + '/columns/indices'
823    response = self.get(
824        r_url,
825        params={
826            'instance': self.get_pipe_instance_keys(pipe),
827        },
828        debug=debug,
829    )
830    j = response.json()
831    if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1:
832        warn(j['detail'])
833        return None
834    if not isinstance(j, dict):
835        warn(response.text)
836        return None
837    return j

Fetch the index information for a pipe.

Parameters
  • pipe (mrsm.Pipe): The pipe whose columns to be queried.
Returns
  • A dictionary mapping column names to a list of associated index information.
def get_pipe_docs( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: str = 'asc', limit: Optional[int] = None, debug: bool = False, **kw: Any) -> List[Dict[str, Any]]:
840def get_pipe_docs(
841    self,
842    pipe: mrsm.Pipe,
843    select_columns: Optional[List[str]] = None,
844    omit_columns: Optional[List[str]] = None,
845    begin: Union[str, datetime, int, None] = None,
846    end: Union[str, datetime, int, None] = None,
847    params: Optional[Dict[str, Any]] = None,
848    order: str = 'asc',
849    limit: Optional[int] = None,
850    debug: bool = False,
851    **kw: Any
852) -> List[Dict[str, Any]]:
853    """Fetch a pipe's data as a list of documents from the API."""
854    r_url = pipe_r_url(pipe)
855    try:
856        response = self.get(
857            r_url + "/docs",
858            params={
859                'select_columns': json.dumps(select_columns),
860                'omit_columns': json.dumps(omit_columns),
861                'begin': begin,
862                'end': end,
863                'params': json.dumps(params, default=str),
864                'order': order,
865                'limit': limit,
866                'instance_keys': self.get_pipe_instance_keys(pipe),
867            },
868            debug=debug,
869        )
870        if not response.ok:
871            warn(f"Failed to get docs for {pipe}:\n{response.text}")
872            return []
873        j = response.json()
874        if isinstance(j, list):
875            return j
876        return []
877    except Exception as e:
878        warn(f"Failed to get docs for {pipe}:\n{e}")
879        return []

Fetch a pipe's data as a list of documents from the API.

def get_pipe_size( self, pipe: meerschaum.Pipe, debug: bool = False, **kw: Any) -> Optional[int]:
882def get_pipe_size(
883    self,
884    pipe: mrsm.Pipe,
885    debug: bool = False,
886    **kw: Any
887) -> Union[int, None]:
888    """
889    Return the on-disk size of a pipe's target table in bytes via the API.
890
891    Parameters
892    ----------
893    pipe: mrsm.Pipe
894        The pipe whose target table size to measure.
895
896    Returns
897    -------
898    An `int` of the number of bytes occupied by the target table,
899    or `None` if the size could not be determined.
900    """
901    r_url = pipe_r_url(pipe) + '/size'
902    response = self.get(
903        r_url,
904        params={
905            'instance': self.get_pipe_instance_keys(pipe),
906        },
907        debug=debug,
908    )
909    if not response:
910        warn(f"Failed to get the size for {pipe}:\n{response.text}")
911        return None
912    try:
913        j = json.loads(response.text)
914    except Exception as e:
915        warn(f"Failed to parse the size for {pipe}:\n{e}")
916        return None
917    if j is None:
918        return None
919    if isinstance(j, dict) and 'detail' in j:
920        warn(j['detail'])
921        return None
922    try:
923        return int(j)
924    except Exception:
925        return None

Return the on-disk size of a pipe's target table in bytes via the API.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table size to measure.
Returns
  • An int of the number of bytes occupied by the target table,
  • or None if the size could not be determined.
def compress_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
928def compress_pipe(
929    self,
930    pipe: mrsm.Pipe,
931    debug: bool = False,
932    **kw: Any
933) -> SuccessTuple:
934    """
935    Compress a pipe's target table via the API.
936
937    Parameters
938    ----------
939    pipe: mrsm.Pipe
940        The pipe whose target table to compress.
941
942    Returns
943    -------
944    A `SuccessTuple` indicating success.
945    """
946    r_url = pipe_r_url(pipe) + '/compress'
947    response = self.post(
948        r_url,
949        params={
950            'instance_keys': self.get_pipe_instance_keys(pipe),
951        },
952        debug=debug,
953    )
954    if debug:
955        dprint(response.text)
956
957    try:
958        data = response.json()
959    except Exception:
960        return False, f"Failed to compress {pipe}."
961
962    if isinstance(data, list):
963        return data[0], data[1]
964    if isinstance(data, dict) and 'detail' in data:
965        return response.__bool__(), data['detail']
966    return response.__bool__(), response.text

Compress a pipe's target table via the API.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to compress.
Returns
  • A SuccessTuple indicating success.
def decompress_pipe( self, pipe: meerschaum.Pipe, no_policy: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
 969def decompress_pipe(
 970    self,
 971    pipe: mrsm.Pipe,
 972    no_policy: bool = False,
 973    debug: bool = False,
 974    **kw: Any
 975) -> SuccessTuple:
 976    """
 977    Decompress a pipe's target table via the API, the inverse of `compress_pipe()`.
 978
 979    Parameters
 980    ----------
 981    pipe: mrsm.Pipe
 982        The pipe whose target table to decompress.
 983
 984    no_policy: bool, default False
 985        If `True`, decompress existing data now but leave the compression policy in place.
 986
 987    Returns
 988    -------
 989    A `SuccessTuple` indicating success.
 990    """
 991    r_url = pipe_r_url(pipe) + '/decompress'
 992    response = self.post(
 993        r_url,
 994        params={
 995            'instance_keys': self.get_pipe_instance_keys(pipe),
 996            'no_policy': no_policy,
 997        },
 998        debug=debug,
 999    )
1000    if debug:
1001        dprint(response.text)
1002
1003    try:
1004        data = response.json()
1005    except Exception:
1006        return False, f"Failed to decompress {pipe}."
1007
1008    if isinstance(data, list):
1009        return data[0], data[1]
1010    if isinstance(data, dict) and 'detail' in data:
1011        return response.__bool__(), data['detail']
1012    return response.__bool__(), response.text

Decompress a pipe's target table via the API, the inverse of compress_pipe().

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to decompress.
  • no_policy (bool, default False): If True, decompress existing data now but leave the compression policy in place.
Returns
  • A SuccessTuple indicating success.
def vacuum_pipe( self, pipe: meerschaum.Pipe, full: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
1015def vacuum_pipe(
1016    self,
1017    pipe: mrsm.Pipe,
1018    full: bool = False,
1019    debug: bool = False,
1020    **kw: Any
1021) -> SuccessTuple:
1022    """
1023    Vacuum a pipe's target table via the API.
1024
1025    Parameters
1026    ----------
1027    pipe: mrsm.Pipe
1028        The pipe whose target table to vacuum.
1029
1030    full: bool, default False
1031        If `True`, run `VACUUM FULL` (PostgreSQL family only).
1032
1033    Returns
1034    -------
1035    A `SuccessTuple` indicating success.
1036    """
1037    r_url = pipe_r_url(pipe) + '/vacuum'
1038    response = self.post(
1039        r_url,
1040        params={
1041            'instance_keys': self.get_pipe_instance_keys(pipe),
1042            'full': full,
1043        },
1044        debug=debug,
1045    )
1046    if debug:
1047        dprint(response.text)
1048
1049    try:
1050        data = response.json()
1051    except Exception:
1052        return False, f"Failed to vacuum {pipe}."
1053
1054    if isinstance(data, list):
1055        return data[0], data[1]
1056    if isinstance(data, dict) and 'detail' in data:
1057        return response.__bool__(), data['detail']
1058    return response.__bool__(), response.text

Vacuum a pipe's target table via the API.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to vacuum.
  • full (bool, default False): If True, run VACUUM FULL (PostgreSQL family only).
Returns
  • A SuccessTuple indicating success.
def analyze_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
1107def analyze_pipe(
1108    self,
1109    pipe: mrsm.Pipe,
1110    debug: bool = False,
1111    **kw: Any
1112) -> SuccessTuple:
1113    """
1114    Analyze a pipe's target table via the API.
1115
1116    Parameters
1117    ----------
1118    pipe: mrsm.Pipe
1119        The pipe whose target table to analyze.
1120
1121    Returns
1122    -------
1123    A `SuccessTuple` indicating success.
1124    """
1125    r_url = pipe_r_url(pipe) + '/analyze'
1126    response = self.post(
1127        r_url,
1128        params={
1129            'instance_keys': self.get_pipe_instance_keys(pipe),
1130        },
1131        debug=debug,
1132    )
1133    if debug:
1134        dprint(response.text)
1135
1136    try:
1137        data = response.json()
1138    except Exception:
1139        return False, f"Failed to analyze {pipe}."
1140
1141    if isinstance(data, list):
1142        return data[0], data[1]
1143    if isinstance(data, dict) and 'detail' in data:
1144        return response.__bool__(), data['detail']
1145    return response.__bool__(), response.text

Analyze a pipe's target table via the API.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table to analyze.
Returns
  • A SuccessTuple indicating success.
def partition_pipe( self, pipe: meerschaum.Pipe, chunk_minutes: Optional[int] = None, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
1061def partition_pipe(
1062    self,
1063    pipe: mrsm.Pipe,
1064    chunk_minutes: Optional[int] = None,
1065    debug: bool = False,
1066    **kw: Any
1067) -> SuccessTuple:
1068    """
1069    Repartition a pipe's target table to a new chunk width via the API.
1070
1071    Parameters
1072    ----------
1073    pipe: mrsm.Pipe
1074        The partitioned pipe whose target table to repartition.
1075
1076    chunk_minutes: Optional[int], default None
1077        The new partition width in minutes. Defaults to the pipe's `verify.chunk_minutes`.
1078
1079    Returns
1080    -------
1081    A `SuccessTuple` indicating success.
1082    """
1083    r_url = pipe_r_url(pipe) + '/partition'
1084    response = self.post(
1085        r_url,
1086        params={
1087            'instance_keys': self.get_pipe_instance_keys(pipe),
1088            **({'chunk_minutes': chunk_minutes} if chunk_minutes is not None else {}),
1089        },
1090        debug=debug,
1091    )
1092    if debug:
1093        dprint(response.text)
1094
1095    try:
1096        data = response.json()
1097    except Exception:
1098        return False, f"Failed to repartition {pipe}."
1099
1100    if isinstance(data, list):
1101        return data[0], data[1]
1102    if isinstance(data, dict) and 'detail' in data:
1103        return response.__bool__(), data['detail']
1104    return response.__bool__(), response.text

Repartition a pipe's target table to a new chunk width via the API.

Parameters
  • pipe (mrsm.Pipe): The partitioned pipe whose target table to repartition.
  • chunk_minutes (Optional[int], default None): The new partition width in minutes. Defaults to the pipe's verify.chunk_minutes.
Returns
  • A SuccessTuple indicating success.
def fetch( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, str, int] = '', end: Union[datetime.datetime, int] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kw: Any) -> "Iterator['pd.DataFrame']":
16def fetch(
17        self,
18        pipe: mrsm.Pipe,
19        begin: Union[datetime, str, int] = '',
20        end: Union[datetime, int] = None,
21        params: Optional[Dict[str, Any]] = None,
22        debug: bool = False,
23        **kw: Any
24    ) -> Iterator['pd.DataFrame']:
25    """Get the Pipe data from the remote Pipe."""
26    from meerschaum.utils.debug import dprint
27    from meerschaum.utils.warnings import warn, error
28    from meerschaum.config._patch import apply_patch_to_config
29
30    fetch_params = pipe.parameters.get('fetch', {})
31    if not fetch_params:
32        warn(f"Missing 'fetch' parameters for {pipe}.", stack=False)
33        return None
34
35    pipe_meta = fetch_params.get('pipe', {})
36    ### Legacy: check for `connector_keys`, etc. at the root.
37    if not pipe_meta:
38        ck, mk, lk = (
39            fetch_params.get('connector_keys', None),
40            fetch_params.get('metric_key', None),
41            fetch_params.get('location_key', None),
42        )
43        if not ck or not mk:
44            warn(f"Missing `fetch:pipe` keys for {pipe}.", stack=False)
45            return None
46
47        pipe_meta.update({
48            'connector': ck,
49            'metric': mk,
50            'location': lk,
51        })
52
53    pipe_meta['instance'] = self
54    source_pipe = mrsm.Pipe(**pipe_meta)
55
56    _params = copy.deepcopy(params) if params is not None else {}
57    _params = apply_patch_to_config(_params, fetch_params.get('params', {}))
58    select_columns = fetch_params.get('select_columns', [])
59    omit_columns = fetch_params.get('omit_columns', [])
60
61    return source_pipe.get_data(
62        select_columns = select_columns,
63        omit_columns = omit_columns,
64        begin = begin,
65        end = end,
66        params = _params,
67        debug = debug,
68        as_iterator = True,
69    )

Get the Pipe data from the remote Pipe.

def register_plugin( self, plugin: meerschaum.Plugin, make_archive: bool = True, debug: bool = False) -> Tuple[bool, str]:
24def register_plugin(
25    self,
26    plugin: mrsm.core.Plugin,
27    make_archive: bool = True,
28    debug: bool = False,
29) -> SuccessTuple:
30    """Register a plugin and upload its archive."""
31    import json
32    archive_path = plugin.make_tar(debug=debug) if make_archive else plugin.archive_path
33    file_pointer = open(archive_path, 'rb')
34    files = {'archive': file_pointer}
35    metadata = {
36        'version': plugin.version,
37        'attributes': json.dumps(plugin.attributes),
38    }
39    r_url = plugin_r_url(plugin)
40    try:
41        response = self.post(r_url, files=files, params=metadata, debug=debug)
42    except Exception:
43        return False, f"Failed to register plugin '{plugin}'."
44    finally:
45        file_pointer.close()
46
47    try:
48        success, msg = json.loads(response.text)
49    except Exception:
50        return False, response.text
51
52    return success, msg

Register a plugin and upload its archive.

def install_plugin( self, name: str, skip_deps: bool = False, force: bool = False, debug: bool = False) -> Tuple[bool, str]:
55def install_plugin(
56    self,
57    name: str,
58    skip_deps: bool = False,
59    force: bool = False,
60    debug: bool = False
61) -> SuccessTuple:
62    """Download and attempt to install a plugin from the API."""
63    import os
64    import pathlib
65    import json
66    from meerschaum.core import Plugin
67    import meerschaum.config.paths as paths
68    from meerschaum.utils.debug import dprint
69    from meerschaum.utils.packages import attempt_import
70    binaryornot_check = attempt_import('binaryornot.check', lazy=False)
71    r_url = plugin_r_url(name)
72    dest = pathlib.Path(os.path.join(paths.PLUGINS_TEMP_RESOURCES_PATH, name + '.tar.gz'))
73    if debug:
74        dprint(f"Fetching from '{self.url + r_url}' to '{dest}'...")
75    archive_path = self.wget(r_url, dest, debug=debug) 
76    is_binary = binaryornot_check.is_binary(str(archive_path))
77    if not is_binary:
78        fail_msg = f"Failed to download binary for plugin '{name}'."
79        try:
80            with open(archive_path, 'r') as f:
81                j = json.load(f)
82            if isinstance(j, list):
83                success, msg = tuple(j)
84            elif isinstance(j, dict) and 'detail' in j:
85                success, msg = False, fail_msg
86        except Exception:
87            success, msg = False, fail_msg
88        return success, msg
89    plugin = Plugin(name, archive_path=archive_path, repo_connector=self)
90    return plugin.install(skip_deps=skip_deps, force=force, debug=debug)

Download and attempt to install a plugin from the API.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False) -> Tuple[bool, str]:
156def delete_plugin(
157    self,
158    plugin: mrsm.core.Plugin,
159    debug: bool = False
160) -> SuccessTuple:
161    """Delete a plugin from an API repository."""
162    import json
163    r_url = plugin_r_url(plugin)
164    try:
165        response = self.delete(r_url, debug=debug)
166    except Exception:
167        return False, f"Failed to delete plugin '{plugin}'."
168
169    try:
170        success, msg = json.loads(response.text)
171    except Exception:
172        return False, response.text
173
174    return success, msg

Delete a plugin from an API repository.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False) -> List[str]:
 93def get_plugins(
 94    self,
 95    user_id: Optional[int] = None,
 96    search_term: Optional[str] = None,
 97    debug: bool = False
 98) -> List[str]:
 99    """Return a list of registered plugin names.
100
101    Parameters
102    ----------
103    user_id: Optional[int], default None
104        If specified, return all plugins from a certain user.
105
106    search_term: Optional[str], default None
107        If specified, return plugins beginning with this string.
108
109    Returns
110    -------
111    A list of plugin names.
112    """
113    import json
114    from meerschaum.utils.warnings import error
115    from meerschaum._internal.static import STATIC_CONFIG
116    response = self.get(
117        STATIC_CONFIG['api']['endpoints']['plugins'],
118        params = {'user_id': user_id, 'search_term': search_term},
119        use_token = True,
120        debug = debug
121    )
122    if not response:
123        return []
124    plugins = json.loads(response.text)
125    if not isinstance(plugins, list):
126        error(response.text)
127    return plugins

Return a list of registered plugin names.

Parameters
  • user_id (Optional[int], default None): If specified, return all plugins from a certain user.
  • search_term (Optional[str], default None): If specified, return plugins beginning with this string.
Returns
  • A list of plugin names.
def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Dict[str, Any]:
130def get_plugin_attributes(
131    self,
132    plugin: mrsm.core.Plugin,
133    debug: bool = False
134) -> Dict[str, Any]:
135    """
136    Return a plugin's attributes.
137    """
138    import json
139    from meerschaum.utils.warnings import warn, error
140    r_url = plugin_r_url(plugin) + '/attributes'
141    response = self.get(r_url, use_token=True, debug=debug)
142    attributes = response.json()
143    if isinstance(attributes, str) and attributes and attributes[0] == '{':
144        try:
145            attributes = json.loads(attributes)
146        except Exception:
147            pass
148    if not isinstance(attributes, dict):
149        error(response.text)
150    elif not response and 'detail' in attributes:
151        warn(attributes['detail'])
152        return {}
153    return attributes

Return a plugin's attributes.

def login( self, debug: bool = False, warn: bool = True, **kw: Any) -> Tuple[bool, str]:
19def login(
20    self,
21    debug: bool = False,
22    warn: bool = True,
23    **kw: Any
24) -> SuccessTuple:
25    """Log in and set the session token."""
26    if self.login_scheme == 'api_key':
27        validate_response = self.post(
28            STATIC_CONFIG['api']['endpoints']['tokens'] + '/validate',
29            headers={'Authorization': f'Bearer {self.api_key}'},
30            use_token=False,
31            debug=debug,
32        )
33        if not validate_response:
34            return False, "API key is not valid."
35        return True, "API key is valid."
36
37    try:
38        if self.login_scheme == 'password':
39            login_data = {
40                'username': self.username,
41                'password': self.password,
42            }
43        elif self.login_scheme == 'client_credentials':
44            login_data = {
45                'client_id': self.client_id,
46                'client_secret': self.client_secret,
47            }
48    except AttributeError:
49        login_data = {}
50
51    if not login_data:
52        return False, f"Please login with the command `login {self}`."
53
54    login_scheme_msg = (
55        f" as user '{login_data['username']}'"
56        if self.login_scheme == 'username'
57        else ''
58    )
59
60    response = self.post(
61        STATIC_CONFIG['api']['endpoints']['login'],
62        data=login_data,
63        use_token=False,
64        debug=debug,
65    )
66    if response:
67        msg = f"Successfully logged into '{self}'{login_scheme_msg}'."
68        self._token = json.loads(response.text)['access_token']
69        self._expires = datetime.datetime.strptime(
70            json.loads(response.text)['expires'], 
71            '%Y-%m-%dT%H:%M:%S.%f'
72        )
73    else:
74        msg = (
75            f"Failed to log into '{self}'{login_scheme_msg}.\n" +
76            f"    Please verify login details for connector '{self}'."
77        )
78        if warn and not self.__dict__.get('_emitted_warning', False):
79            _warn(msg, stack=False)
80            self._emitted_warning = True
81
82    return response.__bool__(), msg

Log in and set the session token.

def test_connection(self, **kw: Any) -> Optional[bool]:
 85def test_connection(
 86    self,
 87    **kw: Any
 88) -> Union[bool, None]:
 89    """Test if a successful connection to the API may be made."""
 90    from meerschaum.connectors.poll import retry_connect
 91    _default_kw = {
 92        'max_retries': 1, 'retry_wait': 0, 'warn': False,
 93        'connector': self, 'enforce_chaining': False,
 94        'enforce_login': False,
 95    }
 96    _default_kw.update(kw)
 97    try:
 98        return retry_connect(**_default_kw)
 99    except Exception:
100        return False

Test if a successful connection to the API may be made.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
70def register_user(
71    self,
72    user: mrsm.core.User,
73    debug: bool = False,
74    **kw: Any
75) -> SuccessTuple:
76    """Register a new user."""
77    from meerschaum._internal.static import STATIC_CONFIG
78    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/register"
79    data = {
80        'username': user.username,
81        'password': user.password,
82        'attributes': json.dumps(user.attributes),
83    }
84    if user.type:
85        data['type'] = user.type
86    if user.email:
87        data['email'] = user.email
88    response = self.post(r_url, data=data, debug=debug)
89    try:
90        _json = json.loads(response.text)
91        if isinstance(_json, dict) and 'detail' in _json:
92            return False, _json['detail']
93        success_tuple = tuple(_json)
94    except Exception:
95        msg = response.text if response else f"Failed to register user '{user}'."
96        return False, msg
97
98    return tuple(success_tuple)

Register a new user.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Union[int, str, uuid.UUID, NoneType]:
101def get_user_id(
102    self,
103    user: mrsm.core.User,
104    debug: bool = False,
105    **kw: Any
106) -> Union[int, str, UUID, None]:
107    """Get a user's ID."""
108    from meerschaum._internal.static import STATIC_CONFIG
109    from meerschaum.utils.misc import is_int, is_uuid
110    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/id"
111    response = self.get(r_url, debug=debug, **kw)
112    try:
113        id_text = str(json.loads(response.text))
114        if is_int(id_text):
115            user_id = int(id_text)
116        elif is_uuid(id_text):
117            user_id = UUID(id_text)
118        else:
119            user_id = id_text
120    except Exception as e:
121        user_id = None
122    return user_id

Get a user's ID.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
19def get_users(
20    self,
21    debug: bool = False,
22    **kw: Any
23) -> List[str]:
24    """
25    Return a list of registered usernames.
26    """
27    from meerschaum._internal.static import STATIC_CONFIG
28    response = self.get(
29        f"{STATIC_CONFIG['api']['endpoints']['users']}",
30        debug = debug,
31        use_token = True,
32    )
33    if not response:
34        return []
35    try:
36        return response.json()
37    except Exception as e:
38        return []

Return a list of registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
41def edit_user(
42    self,
43    user: mrsm.core.User,
44    debug: bool = False,
45    **kw: Any
46) -> SuccessTuple:
47    """Edit an existing user."""
48    from meerschaum._internal.static import STATIC_CONFIG
49    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/edit"
50    data = {
51        'username': user.username,
52        'password': user.password,
53        'type': user.type,
54        'email': user.email,
55        'attributes': json.dumps(user.attributes),
56    }
57    response = self.post(r_url, data=data, debug=debug)
58    try:
59        _json = json.loads(response.text)
60        if isinstance(_json, dict) and 'detail' in _json:
61            return False, _json['detail']
62        success_tuple = tuple(_json)
63    except Exception:
64        msg = response.text if response else f"Failed to edit user '{user}'."
65        return False, msg
66
67    return tuple(success_tuple)

Edit an existing user.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
125def delete_user(
126    self,
127    user: mrsm.core.User,
128    debug: bool = False,
129    **kw: Any
130) -> SuccessTuple:
131    """Delete a user."""
132    from meerschaum._internal.static import STATIC_CONFIG
133    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}"
134    response = self.delete(r_url, debug=debug)
135    try:
136        _json = json.loads(response.text)
137        if isinstance(_json, dict) and 'detail' in _json:
138            return False, _json['detail']
139        success_tuple = tuple(_json)
140    except Exception:
141        success_tuple = False, f"Failed to delete user '{user.username}'."
142    return success_tuple

Delete a user.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
166def get_user_password_hash(
167    self,
168    user: mrsm.core.User,
169    debug: bool = False,
170    **kw: Any
171) -> Optional[str]:
172    """If configured, get a user's password hash."""
173    from meerschaum._internal.static import STATIC_CONFIG
174    r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/password_hash'
175    response = self.get(r_url, debug=debug, **kw)
176    if not response:
177        return None
178    return response.json()

If configured, get a user's password hash.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
181def get_user_type(
182    self,
183    user: mrsm.core.User,
184    debug: bool = False,
185    **kw: Any
186) -> Optional[str]:
187    """If configured, get a user's type."""
188    from meerschaum._internal.static import STATIC_CONFIG
189    r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/type'
190    response = self.get(r_url, debug=debug, **kw)
191    if not response:
192        return None
193    return response.json()

If configured, get a user's type.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw) -> int:
145def get_user_attributes(
146    self,
147    user: mrsm.core.User,
148    debug: bool = False,
149    **kw
150) -> int:
151    """Get a user's attributes."""
152    from meerschaum._internal.static import STATIC_CONFIG
153    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/attributes"
154    response = self.get(r_url, debug=debug, **kw)
155    try:
156        attributes = json.loads(response.text)
157    except Exception:
158        attributes = None
159    return attributes

Get a user's attributes.

def register_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
20def register_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
21    """
22    Register the provided token to the API.
23    """
24    from meerschaum.utils.dtypes import json_serialize_value
25    r_url = tokens_endpoint + '/register'
26    response = self.post(
27        r_url,
28        data=json.dumps({
29            'label': token.label,
30            'scopes': token.scopes,
31            'expiration': token.expiration,
32        }, default=json_serialize_value),
33        debug=debug,
34    )
35    if not response:
36        return False, f"Failed to register token:\n{response.text}"
37
38    data = response.json()
39    token.label = data['label']
40    token.secret = data['secret']
41    token.id = uuid.UUID(data['id'])
42    if data.get('expiration', None):
43        token.expiration = datetime.fromisoformat(data['expiration'])
44
45    return True, f"Registered token '{token.label}'."

Register the provided token to the API.

def get_token_model( self, token_id: uuid.UUID, debug: bool = False) -> 'Union[TokenModel, None]':
48def get_token_model(self, token_id: uuid.UUID, debug: bool = False) -> 'Union[TokenModel, None]':
49    """
50    Return a token's model from the API instance.
51    """
52    from meerschaum.models import TokenModel
53    r_url = tokens_endpoint + f'/{token_id}'
54    response = self.get(r_url, debug=debug)
55    if not response:
56        return None
57    data = response.json()
58    return TokenModel(**data)

Return a token's model from the API instance.

def get_tokens( self, labels: Optional[List[str]] = None, debug: bool = False) -> List[meerschaum.core.Token._Token.Token]:
61def get_tokens(self, labels: Optional[List[str]] = None, debug: bool = False) -> List[Token]:
62    """
63    Return the tokens registered to the current user.
64    """
65    from meerschaum.utils.warnings import warn
66    r_url = tokens_endpoint
67    params = {}
68    if labels:
69        params['labels'] = ','.join(labels)
70    response = self.get(r_url, params={'labels': labels}, debug=debug)
71    if not response:
72        warn(f"Could not get tokens from '{self}':\n{response.text}")
73        return []
74
75    tokens = [
76        Token(instance=self, **payload)
77        for payload in response.json()
78    ]
79    return tokens

Return the tokens registered to the current user.

def edit_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
 82def edit_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
 83    """
 84    Persist the token's in-memory state to the API.
 85    """
 86    r_url = tokens_endpoint + f"/{token.id}/edit"
 87    response = self.post(
 88        r_url,
 89        json={
 90            'creation': token.creation.isoformat() if token.creation else None,
 91            'expiration': token.expiration.isoformat() if token.expiration else None,
 92            'label': token.label,
 93            'is_valid': token.is_valid,
 94            'scopes': token.scopes,
 95        },
 96    )
 97    if not response:
 98        return False, f"Failed to edit token:\n{response.text}"
 99
100    success, msg = response.json()
101    return success, msg

Persist the token's in-memory state to the API.

def invalidate_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
104def invalidate_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
105    """
106    Invalidate the token, disabling it for future requests.
107    """
108    r_url = tokens_endpoint + f"/{token.id}/invalidate"
109    response = self.post(r_url)
110    if not response:
111        return False, f"Failed to invalidate token:\n{response.text}"
112
113    success, msg = response.json()
114    return success, msg

Invalidate the token, disabling it for future requests.

def get_token_scopes( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> List[str]:
117def get_token_scopes(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> List[str]:
118    """
119    Return the scopes for a token.
120    """
121    _token_id = (token_id.id if isinstance(token_id, Token) else token_id)
122    model = self.get_token_model(_token_id, debug=debug).scopes
123    return getattr(model, 'scopes', [])

Return the scopes for a token.

def token_exists( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> bool:
126def token_exists(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> bool:
127    """
128    Return `True` if a token exists.
129    """
130    _token_id = (token_id.id if isinstance(token_id, Token) else token_id)
131    model = self.get_token_model(_token_id, debug=debug)
132    if model is None:
133        return False
134    return model.creation is not None

Return True if a token exists.

def delete_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
137def delete_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
138    """
139    Delete the token from the API.
140    """
141    r_url = tokens_endpoint + f"/{token.id}"
142    response = self.delete(r_url, debug=debug)
143    if not response:
144        return False, f"Failed to delete token:\n{response.text}"
145    
146    success, msg = response.json()
147    return success, msg

Delete the token from the API.

@classmethod
def from_uri( cls, uri: str, label: Optional[str] = None, as_dict: bool = False) -> Union[APIConnector, Dict[str, Union[str, int]]]:
13@classmethod
14def from_uri(
15    cls,
16    uri: str,
17    label: Optional[str] = None,
18    as_dict: bool = False,
19) -> Union[
20        'meerschaum.connectors.APIConnector',
21        Dict[str, Union[str, int]],
22    ]:
23    """
24    Create a new APIConnector from a URI string.
25
26    Parameters
27    ----------
28    uri: str
29        The URI connection string.
30
31    label: Optional[str], default None
32        If provided, use this as the connector label.
33        Otherwise use the determined database name.
34
35    as_dict: bool, default False
36        If `True`, return a dictionary of the keyword arguments
37        necessary to create a new `APIConnector`, otherwise create a new object.
38
39    Returns
40    -------
41    A new APIConnector object or a dictionary of attributes (if `as_dict` is `True`).
42    """
43    from meerschaum.connectors.sql import SQLConnector
44    params = SQLConnector.parse_uri(uri)
45    if 'host' not in params:
46        error("No host was found in the provided URI.")
47    params['protocol'] = params.pop('flavor')
48    params['label'] = label or (
49        (
50            (params['username'] + '@' if 'username' in params else '')
51            + params['host']
52        ).lower()
53    )
54
55    return cls(**params) if not as_dict else params

Create a new APIConnector from a URI string.

Parameters
  • uri (str): The URI connection string.
  • label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
  • as_dict (bool, default False): If True, return a dictionary of the keyword arguments necessary to create a new APIConnector, otherwise create a new object.
Returns
  • A new APIConnector object or a dictionary of attributes (if as_dict is True).
def get_jobs(self, debug: bool = False) -> Dict[str, meerschaum.Job]:
28def get_jobs(self, debug: bool = False) -> Dict[str, Job]:
29    """
30    Return a dictionary of remote jobs.
31    """
32    response = self.get(JOBS_ENDPOINT, debug=debug)
33    if not response:
34        warn(f"Failed to get remote jobs from {self}.")
35        return {}
36    return {
37        name: Job(
38            name,
39            job_meta['sysargs'],
40            executor_keys=str(self),
41            _properties=job_meta['daemon']['properties']
42        )
43        for name, job_meta in response.json().items()
44    }

Return a dictionary of remote jobs.

def get_job(self, name: str, debug: bool = False) -> meerschaum.Job:
47def get_job(self, name: str, debug: bool = False) -> Job:
48    """
49    Return a single Job object.
50    """
51    metadata = self.get_job_metadata(name, debug=debug)
52    if not metadata:
53        raise ValueError(f"Job '{name}' does not exist.")
54
55    return Job(
56        name,
57        metadata['sysargs'],
58        executor_keys=str(self),
59        _properties=metadata['daemon']['properties'],
60    )

Return a single Job object.

def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]:
 63def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]:
 64    """
 65    Return the metadata for a single job.
 66    """
 67    now = time.perf_counter()
 68    _job_metadata_cache = self.__dict__.get('_job_metadata_cache', None)
 69    _job_metadata_timestamp = (
 70        _job_metadata_cache.get(name, {}).get('timestamp', None)
 71    ) if _job_metadata_cache is not None else None
 72
 73    if (
 74        _job_metadata_timestamp is not None
 75        and (now - _job_metadata_timestamp) < JOB_METADATA_CACHE_SECONDS
 76    ):
 77        if debug:
 78            dprint(f"Returning cached metadata for job '{name}'.")
 79        return _job_metadata_cache[name]['metadata']
 80
 81    response = self.get(JOBS_ENDPOINT + f"/{name}", debug=debug)
 82    if not response:
 83        if debug:
 84            msg = (
 85                response.json()['detail']
 86                if 'detail' in response.text
 87                else response.text
 88            )
 89            warn(f"Failed to get metadata for job '{name}':\n{msg}")
 90        return {}
 91
 92    metadata = response.json()
 93    if _job_metadata_cache is None:
 94        self._job_metadata_cache = {}
 95
 96    self._job_metadata_cache[name] = {
 97        'timestamp': now,
 98        'metadata': metadata,
 99    }
100    return metadata

Return the metadata for a single job.

def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]:
102def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]:
103    """
104    Return the daemon properties for a single job.
105    """
106    metadata = self.get_job_metadata(name, debug=debug)
107    return metadata.get('daemon', {}).get('properties', {})

Return the daemon properties for a single job.

def get_job_exists(self, name: str, debug: bool = False) -> bool:
149def get_job_exists(self, name: str, debug: bool = False) -> bool:
150    """
151    Return whether a job exists.
152    """
153    response = self.get(JOBS_ENDPOINT + f'/{name}/exists', debug=debug)
154    if not response:
155        warn(f"Failed to determine whether job '{name}' exists.")
156        return False
157
158    return response.json()

Return whether a job exists.

def delete_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
161def delete_job(self, name: str, debug: bool = False) -> SuccessTuple:
162    """
163    Delete a job.
164    """
165    response = self.delete(JOBS_ENDPOINT + f"/{name}", debug=debug)
166    if not response:
167        if 'detail' in response.text:
168            return False, response.json()['detail']
169
170        return False, response.text
171
172    return tuple(response.json())

Delete a job.

def start_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
175def start_job(self, name: str, debug: bool = False) -> SuccessTuple:
176    """
177    Start a job.
178    """
179    response = self.post(JOBS_ENDPOINT + f"/{name}/start", debug=debug)
180    if not response:
181        if 'detail' in response.text:
182            return False, response.json()['detail']
183        return False, response.text
184
185    return tuple(response.json())

Start a job.

def create_job( self, name: str, sysargs: List[str], properties: Optional[Dict[str, str]] = None, debug: bool = False) -> Tuple[bool, str]:
188def create_job(
189    self,
190    name: str,
191    sysargs: List[str],
192    properties: Optional[Dict[str, str]] = None,
193    debug: bool = False,
194) -> SuccessTuple:
195    """
196    Create a job.
197    """
198    response = self.post(
199        JOBS_ENDPOINT + f"/{name}",
200        json={
201            'sysargs': sysargs,
202            'properties': properties,
203        },
204        debug=debug,
205    )
206    if not response:
207        if 'detail' in response.text:
208            return False, response.json()['detail']
209        return False, response.text
210
211    return tuple(response.json())

Create a job.

def stop_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
214def stop_job(self, name: str, debug: bool = False) -> SuccessTuple:
215    """
216    Stop a job.
217    """
218    response = self.post(JOBS_ENDPOINT + f"/{name}/stop", debug=debug)
219    if not response:
220        if 'detail' in response.text:
221            return False, response.json()['detail']
222        return False, response.text
223
224    return tuple(response.json())

Stop a job.

def pause_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
227def pause_job(self, name: str, debug: bool = False) -> SuccessTuple:
228    """
229    Pause a job.
230    """
231    response = self.post(JOBS_ENDPOINT + f"/{name}/pause", debug=debug)
232    if not response:
233        if 'detail' in response.text:
234            return False, response.json()['detail']
235        return False, response.text
236
237    return tuple(response.json())

Pause a job.

def get_logs(self, name: str, debug: bool = False) -> str:
240def get_logs(self, name: str, debug: bool = False) -> str:
241    """
242    Return the logs for a job.
243    """
244    response = self.get(LOGS_ENDPOINT + f"/{name}")
245    if not response:
246        raise ValueError(f"Cannot fetch logs for job '{name}':\n{response.text}")
247
248    return response.json()

Return the logs for a job.

def get_job_stop_time(self, name: str, debug: bool = False) -> Optional[datetime.datetime]:
251def get_job_stop_time(self, name: str, debug: bool = False) -> Union[datetime, None]:
252    """
253    Return the job's manual stop time.
254    """
255    response = self.get(JOBS_ENDPOINT + f"/{name}/stop_time")
256    if not response:
257        warn(f"Failed to get stop time for job '{name}':\n{response.text}")
258        return None
259
260    data = response.json()
261    if data is None:
262        return None
263
264    return datetime.fromisoformat(data)

Return the job's manual stop time.

def monitor_logs( self, name: str, callback_function: Callable[[Any], Any], input_callback_function: Callable[[NoneType], str], stop_callback_function: Callable[[NoneType], str], stop_on_exit: bool = False, strip_timestamps: bool = False, accept_input: bool = True, debug: bool = False):
348def monitor_logs(
349    self,
350    name: str,
351    callback_function: Callable[[Any], Any],
352    input_callback_function: Callable[[None], str],
353    stop_callback_function: Callable[[None], str],
354    stop_on_exit: bool = False,
355    strip_timestamps: bool = False,
356    accept_input: bool = True,
357    debug: bool = False,
358):
359    """
360    Monitor a job's log files and execute a callback with the changes.
361    """
362    return asyncio.run(
363        self.monitor_logs_async(
364            name,
365            callback_function,
366            input_callback_function=input_callback_function,
367            stop_callback_function=stop_callback_function,
368            stop_on_exit=stop_on_exit,
369            strip_timestamps=strip_timestamps,
370            accept_input=accept_input,
371            debug=debug
372        )
373    )

Monitor a job's log files and execute a callback with the changes.

async def monitor_logs_async( self, name: str, callback_function: Callable[[Any], Any], input_callback_function: Callable[[], str], stop_callback_function: Callable[[Tuple[bool, str]], str], stop_on_exit: bool = False, strip_timestamps: bool = False, accept_input: bool = True, debug: bool = False):
267async def monitor_logs_async(
268    self,
269    name: str,
270    callback_function: Callable[[Any], Any],
271    input_callback_function: Callable[[], str],
272    stop_callback_function: Callable[[SuccessTuple], str],
273    stop_on_exit: bool = False,
274    strip_timestamps: bool = False,
275    accept_input: bool = True,
276    debug: bool = False,
277):
278    """
279    Monitor a job's log files and await a callback with the changes.
280    """
281    import traceback
282    from meerschaum.jobs import StopMonitoringLogs
283    from meerschaum.utils.formatting._jobs import strip_timestamp_from_line
284
285    websockets, websockets_exceptions = mrsm.attempt_import('websockets', 'websockets.exceptions')
286    protocol = 'ws' if self.URI.startswith('http://') else 'wss'
287    port = self.port if 'port' in self.__dict__ else ''
288    uri = f"{protocol}://{self.host}:{port}{LOGS_ENDPOINT}/{name}/ws"
289
290    async def _stdin_callback(client):
291        if input_callback_function is None:
292            return
293
294        if asyncio.iscoroutinefunction(input_callback_function):
295            data = await input_callback_function()
296        else:
297            data = input_callback_function()
298
299        await client.send(data)
300
301    async def _stop_callback(client):
302        try:
303            result = tuple(json.loads(await client.recv()))
304        except Exception as e:
305            warn(traceback.format_exc())
306            result = False, str(e)
307
308        if stop_callback_function is not None:
309            if asyncio.iscoroutinefunction(stop_callback_function):
310                await stop_callback_function(result)
311            else:
312                stop_callback_function(result)
313
314        if stop_on_exit:
315            raise StopMonitoringLogs
316
317    message_callbacks = {
318        JOBS_STDIN_MESSAGE: _stdin_callback,
319        JOBS_STOP_MESSAGE: _stop_callback,
320    }
321
322    async with websockets.connect(uri) as websocket:
323        try:
324            await websocket.send(self.token or 'no-login')
325        except websockets_exceptions.ConnectionClosedOK:
326            pass
327
328        while True:
329            try:
330                response = await websocket.recv()
331                callback = message_callbacks.get(response, None)
332                if callback is not None:
333                    await callback(websocket)
334                    continue
335
336                if strip_timestamps:
337                    response = strip_timestamp_from_line(response)
338
339                if asyncio.iscoroutinefunction(callback_function):
340                    await callback_function(response)
341                else:
342                    callback_function(response)
343            except (KeyboardInterrupt, StopMonitoringLogs):
344                await websocket.close()
345                break

Monitor a job's log files and await a callback with the changes.

def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool:
375def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool:
376    """
377    Return whether a remote job is blocking on stdin.
378    """
379    response = self.get(JOBS_ENDPOINT + f'/{name}/is_blocking_on_stdin', debug=debug)
380    if not response:
381        return False
382
383    return response.json()

Return whether a remote job is blocking on stdin.

def get_job_began(self, name: str, debug: bool = False) -> Optional[str]:
116def get_job_began(self, name: str, debug: bool = False) -> Union[str, None]:
117    """
118    Return a job's `began` timestamp, if it exists.
119    """
120    properties = self.get_job_properties(name, debug=debug)
121    began_str = properties.get('daemon', {}).get('began', None)
122    if began_str is None:
123        return None
124
125    return began_str

Return a job's began timestamp, if it exists.

def get_job_ended(self, name: str, debug: bool = False) -> Optional[str]:
127def get_job_ended(self, name: str, debug: bool = False) -> Union[str, None]:
128    """
129    Return a job's `ended` timestamp, if it exists.
130    """
131    properties = self.get_job_properties(name, debug=debug)
132    ended_str = properties.get('daemon', {}).get('ended', None)
133    if ended_str is None:
134        return None
135
136    return ended_str

Return a job's ended timestamp, if it exists.

def get_job_paused(self, name: str, debug: bool = False) -> Optional[str]:
138def get_job_paused(self, name: str, debug: bool = False) -> Union[str, None]:
139    """
140    Return a job's `paused` timestamp, if it exists.
141    """
142    properties = self.get_job_properties(name, debug=debug)
143    paused_str = properties.get('daemon', {}).get('paused', None)
144    if paused_str is None:
145        return None
146
147    return paused_str

Return a job's paused timestamp, if it exists.

def get_job_status(self, name: str, debug: bool = False) -> str:
109def get_job_status(self, name: str, debug: bool = False) -> str:
110    """
111    Return the job's status.
112    """
113    metadata = self.get_job_metadata(name, debug=debug)
114    return metadata.get('status', 'stopped')

Return the job's status.

def get_connector( type: str = None, label: str = None, refresh: bool = False, debug: bool = False, _load_plugins: bool = True, **kw: Any) -> Connector:
 68def get_connector(
 69    type: str = None,
 70    label: str = None,
 71    refresh: bool = False,
 72    debug: bool = False,
 73    _load_plugins: bool = True,
 74    **kw: Any
 75) -> Connector:
 76    """
 77    Return existing connector or create new connection and store for reuse.
 78    
 79    You can create new connectors if enough parameters are provided for the given type and flavor.
 80
 81    Parameters
 82    ----------
 83    type: Optional[str], default None
 84        Connector type (sql, api, etc.).
 85        Defaults to the type of the configured `instance_connector`.
 86
 87    label: Optional[str], default None
 88        Connector label (e.g. main). Defaults to `'main'`.
 89
 90    refresh: bool, default False
 91        Refresh the Connector instance / construct new object. Defaults to `False`.
 92
 93    kw: Any
 94        Other arguments to pass to the Connector constructor.
 95        If the Connector has already been constructed and new arguments are provided,
 96        `refresh` is set to `True` and the old Connector is replaced.
 97
 98    Returns
 99    -------
100    A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`,
101    `meerschaum.connectors.sql.SQLConnector`).
102    
103    Examples
104    --------
105    The following parameters would create a new
106    `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file.
107
108    ```
109    >>> conn = get_connector(
110    ...     type = 'sql',
111    ...     label = 'newlabel',
112    ...     flavor = 'sqlite',
113    ...     database = '/file/path/to/database.db'
114    ... )
115    >>>
116    ```
117
118    """
119    from meerschaum.connectors.parse import parse_instance_keys
120    from meerschaum.config import get_config
121    from meerschaum._internal.static import STATIC_CONFIG
122    from meerschaum.utils.warnings import warn
123    global _loaded_plugin_connectors
124    if isinstance(type, str) and not label and ':' in type:
125        type, label = type.split(':', maxsplit=1)
126
127    if _load_plugins:
128        with _locks['_loaded_plugin_connectors']:
129            if not _loaded_plugin_connectors:
130                load_plugin_connectors()
131                _load_builtin_custom_connectors()
132                _loaded_plugin_connectors = True
133
134    if type is None and label is None:
135        default_instance_keys = get_config('meerschaum', 'instance', patch=True)
136        ### recursive call to get_connector
137        return parse_instance_keys(default_instance_keys)
138
139    ### NOTE: the default instance connector may not be main.
140    ### Only fall back to 'main' if the type is provided by the label is omitted.
141    label = label if label is not None else STATIC_CONFIG['connectors']['default_label']
142
143    ### type might actually be a label. Check if so and raise a warning.
144    if type not in connectors:
145        possibilities, poss_msg = [], ""
146        for _type in get_config('meerschaum', 'connectors'):
147            if type in get_config('meerschaum', 'connectors', _type):
148                possibilities.append(f"{_type}:{type}")
149        if len(possibilities) > 0:
150            poss_msg = " Did you mean"
151            for poss in possibilities[:-1]:
152                poss_msg += f" '{poss}',"
153            if poss_msg.endswith(','):
154                poss_msg = poss_msg[:-1]
155            if len(possibilities) > 1:
156                poss_msg += " or"
157            poss_msg += f" '{possibilities[-1]}'?"
158
159        warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False)
160        return None
161
162    if 'sql' not in types:
163        from meerschaum.connectors.plugin import PluginConnector
164        from meerschaum.connectors.valkey import ValkeyConnector
165        with _locks['types']:
166            types.update({
167                'api': APIConnector,
168                'sql': SQLConnector,
169                'plugin': PluginConnector,
170                'valkey': ValkeyConnector,
171            })
172
173    ### determine if we need to call the constructor
174    if not refresh:
175        ### see if any user-supplied arguments differ from the existing instance
176        if label in connectors[type]:
177            warning_message = None
178            for attribute, value in kw.items():
179                if attribute not in connectors[type][label].meta:
180                    import inspect
181                    cls = connectors[type][label].__class__
182                    cls_init_signature = inspect.signature(cls)
183                    cls_init_params = cls_init_signature.parameters
184                    if attribute not in cls_init_params:
185                        warning_message = (
186                            f"Received new attribute '{attribute}' not present in connector " +
187                            f"{connectors[type][label]}.\n"
188                        )
189                elif connectors[type][label].__dict__[attribute] != value:
190                    warning_message = (
191                        f"Mismatched values for attribute '{attribute}' in connector "
192                        + f"'{connectors[type][label]}'.\n" +
193                        f"  - Keyword value: '{value}'\n" +
194                        f"  - Existing value: '{connectors[type][label].__dict__[attribute]}'\n"
195                    )
196            if warning_message is not None:
197                warning_message += (
198                    "\nSetting `refresh` to True and recreating connector with type:"
199                    + f" '{type}' and label '{label}'."
200                )
201                refresh = True
202                warn(warning_message)
203        else: ### connector doesn't yet exist
204            refresh = True
205
206    ### only create an object if refresh is True
207    ### (can be manually specified, otherwise determined above)
208    if refresh:
209        with _locks['connectors']:
210            try:
211                ### will raise an error if configuration is incorrect / missing
212                conn = types[type](label=label, **kw)
213                connectors[type][label] = conn
214            except InvalidAttributesError as ie:
215                warn(
216                    f"Incorrect attributes for connector '{type}:{label}'.\n"
217                    + str(ie),
218                    stack = False,
219                )
220                conn = None
221            except Exception as e:
222                from meerschaum.utils.formatting import get_console
223                console = get_console()
224                if console:
225                    console.print_exception()
226                warn(
227                    f"Exception when creating connector '{type}:{label}'.\n" + str(e),
228                    stack = False,
229                )
230                conn = None
231        if conn is None:
232            return None
233
234    return connectors[type][label]

Return existing connector or create new connection and store for reuse.

You can create new connectors if enough parameters are provided for the given type and flavor.

Parameters
  • type (Optional[str], default None): Connector type (sql, api, etc.). Defaults to the type of the configured instance_connector.
  • label (Optional[str], default None): Connector label (e.g. main). Defaults to 'main'.
  • refresh (bool, default False): Refresh the Connector instance / construct new object. Defaults to False.
  • kw (Any): Other arguments to pass to the Connector constructor. If the Connector has already been constructed and new arguments are provided, refresh is set to True and the old Connector is replaced.
Returns
Examples

The following parameters would create a new meerschaum.connectors.sql.SQLConnector that isn't in the configuration file.

>>> conn = get_connector(
...     type = 'sql',
...     label = 'newlabel',
...     flavor = 'sqlite',
...     database = '/file/path/to/database.db'
... )
>>>
def is_connected(keys: str, **kw) -> bool:
237def is_connected(keys: str, **kw) -> bool:
238    """
239    Check if the connector keys correspond to an active connection.
240    If the connector has not been created, it will immediately return `False`.
241    If the connector exists but cannot communicate with the source, return `False`.
242    
243    **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`).
244    Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
245
246    Parameters
247    ----------
248    keys:
249        The keys to the connector (e.g. `'sql:main'`).
250        
251    Returns
252    -------
253    A `bool` corresponding to whether a successful connection may be made.
254
255    """
256    import warnings
257    if ':' not in keys:
258        warn(f"Invalid connector keys '{keys}'")
259
260    try:
261        typ, label = keys.split(':')
262    except Exception:
263        return False
264    if typ not in instance_types:
265        return False
266    if label not in connectors.get(typ, {}):
267        return False
268
269    from meerschaum.connectors.parse import parse_instance_keys
270    conn = parse_instance_keys(keys)
271    try:
272        with warnings.catch_warnings():
273            warnings.filterwarnings('ignore')
274            return conn.test_connection(**kw)
275    except Exception:
276        return False

Check if the connector keys correspond to an active connection. If the connector has not been created, it will immediately return False. If the connector exists but cannot communicate with the source, return False.

NOTE: Only works with instance connectors (SQLConnectors and APIConnectors). Keyword arguments are passed to meerschaum.connectors.poll.retry_connect.

Parameters
  • keys:: The keys to the connector (e.g. 'sql:main').
Returns
  • A bool corresponding to whether a successful connection may be made.