meerschaum.connectors

Create connectors with meerschaum.connectors.get_connector(). For ease of use, you can also import from the root meerschaum module:

>>> from meerschaum import get_connector
>>> conn = get_connector()
  1#! /usr/bin/env python
  2# -*- coding: utf-8 -*-
  3# vim:fenc=utf-8
  4
  5"""
  6Create connectors with `meerschaum.connectors.get_connector()`.
  7For ease of use, you can also import from the root `meerschaum` module:
  8```
  9>>> from meerschaum import get_connector
 10>>> conn = get_connector()
 11```
 12"""
 13
 14from __future__ import annotations
 15
 16import meerschaum as mrsm
 17from meerschaum.utils.typing import Any, Union, List, Dict
 18from meerschaum.utils.threading import RLock
 19from meerschaum.utils.warnings import warn
 20
 21from meerschaum.connectors._Connector import Connector, InvalidAttributesError
 22from meerschaum.connectors.sql._SQLConnector import SQLConnector
 23from meerschaum.connectors.api._APIConnector import APIConnector
 24from meerschaum.connectors.sql._create_engine import flavor_configs as sql_flavor_configs
 25
 26__all__ = (
 27    "make_connector",
 28    "Connector",
 29    "SQLConnector",
 30    "APIConnector",
 31    "get_connector",
 32    "is_connected",
 33    "poll",
 34    "api",
 35    "sql",
 36    "valkey",
 37)
 38
 39### store connectors partitioned by
 40### type, label for reuse
 41connectors: Dict[str, Dict[str, Connector]] = {
 42    'api'    : {},
 43    'sql'    : {},
 44    'plugin' : {},
 45    'valkey' : {},
 46}
 47instance_types: List[str] = ['sql', 'api']
 48_locks: Dict[str, RLock] = {
 49    'connectors'               : RLock(),
 50    'types'                    : RLock(),
 51    'custom_types'             : RLock(),
 52    '_loaded_plugin_connectors': RLock(),
 53    'instance_types'           : RLock(),
 54}
 55attributes: Dict[str, Dict[str, Any]] = {
 56    'api': {
 57        'required': [
 58            'host',
 59            'username',
 60            'password',
 61        ],
 62        'optional': [
 63            'port',
 64        ],
 65        'default': {
 66            'protocol': 'http',
 67        },
 68    },
 69    'sql': {
 70        'flavors': sql_flavor_configs,
 71    },
 72}
 73### Fill this with objects only when connectors are first referenced.
 74types: Dict[str, Any] = {}
 75custom_types: set = set()
 76_loaded_plugin_connectors: bool = False
 77
 78
 79def get_connector(
 80    type: str = None,
 81    label: str = None,
 82    refresh: bool = False,
 83    debug: bool = False,
 84    **kw: Any
 85) -> Connector:
 86    """
 87    Return existing connector or create new connection and store for reuse.
 88    
 89    You can create new connectors if enough parameters are provided for the given type and flavor.
 90    
 91
 92    Parameters
 93    ----------
 94    type: Optional[str], default None
 95        Connector type (sql, api, etc.).
 96        Defaults to the type of the configured `instance_connector`.
 97
 98    label: Optional[str], default None
 99        Connector label (e.g. main). Defaults to `'main'`.
100
101    refresh: bool, default False
102        Refresh the Connector instance / construct new object. Defaults to `False`.
103
104    kw: Any
105        Other arguments to pass to the Connector constructor.
106        If the Connector has already been constructed and new arguments are provided,
107        `refresh` is set to `True` and the old Connector is replaced.
108
109    Returns
110    -------
111    A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`,
112    `meerschaum.connectors.sql.SQLConnector`).
113    
114    Examples
115    --------
116    The following parameters would create a new
117    `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file.
118
119    ```
120    >>> conn = get_connector(
121    ...     type = 'sql',
122    ...     label = 'newlabel',
123    ...     flavor = 'sqlite',
124    ...     database = '/file/path/to/database.db'
125    ... )
126    >>>
127    ```
128
129    """
130    from meerschaum.connectors.parse import parse_instance_keys
131    from meerschaum.config import get_config
132    from meerschaum.config.static import STATIC_CONFIG
133    from meerschaum.utils.warnings import warn
134    global _loaded_plugin_connectors
135    if isinstance(type, str) and not label and ':' in type:
136        type, label = type.split(':', maxsplit=1)
137
138    with _locks['_loaded_plugin_connectors']:
139        if not _loaded_plugin_connectors:
140            load_plugin_connectors()
141            _load_builtin_custom_connectors()
142            _loaded_plugin_connectors = True
143
144    if type is None and label is None:
145        default_instance_keys = get_config('meerschaum', 'instance', patch=True)
146        ### recursive call to get_connector
147        return parse_instance_keys(default_instance_keys)
148
149    ### NOTE: the default instance connector may not be main.
150    ### Only fall back to 'main' if the type is provided by the label is omitted.
151    label = label if label is not None else STATIC_CONFIG['connectors']['default_label']
152
153    ### type might actually be a label. Check if so and raise a warning.
154    if type not in connectors:
155        possibilities, poss_msg = [], ""
156        for _type in get_config('meerschaum', 'connectors'):
157            if type in get_config('meerschaum', 'connectors', _type):
158                possibilities.append(f"{_type}:{type}")
159        if len(possibilities) > 0:
160            poss_msg = " Did you mean"
161            for poss in possibilities[:-1]:
162                poss_msg += f" '{poss}',"
163            if poss_msg.endswith(','):
164                poss_msg = poss_msg[:-1]
165            if len(possibilities) > 1:
166                poss_msg += " or"
167            poss_msg += f" '{possibilities[-1]}'?"
168
169        warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False)
170        return None
171
172    if 'sql' not in types:
173        from meerschaum.connectors.plugin import PluginConnector
174        from meerschaum.connectors.valkey import ValkeyConnector
175        with _locks['types']:
176            types.update({
177                'api': APIConnector,
178                'sql': SQLConnector,
179                'plugin': PluginConnector,
180                'valkey': ValkeyConnector,
181            })
182
183    ### determine if we need to call the constructor
184    if not refresh:
185        ### see if any user-supplied arguments differ from the existing instance
186        if label in connectors[type]:
187            warning_message = None
188            for attribute, value in kw.items():
189                if attribute not in connectors[type][label].meta:
190                    import inspect
191                    cls = connectors[type][label].__class__
192                    cls_init_signature = inspect.signature(cls)
193                    cls_init_params = cls_init_signature.parameters
194                    if attribute not in cls_init_params:
195                        warning_message = (
196                            f"Received new attribute '{attribute}' not present in connector " +
197                            f"{connectors[type][label]}.\n"
198                        )
199                elif connectors[type][label].__dict__[attribute] != value:
200                    warning_message = (
201                        f"Mismatched values for attribute '{attribute}' in connector "
202                        + f"'{connectors[type][label]}'.\n" +
203                        f"  - Keyword value: '{value}'\n" +
204                        f"  - Existing value: '{connectors[type][label].__dict__[attribute]}'\n"
205                    )
206            if warning_message is not None:
207                warning_message += (
208                    "\nSetting `refresh` to True and recreating connector with type:"
209                    + f" '{type}' and label '{label}'."
210                )
211                refresh = True
212                warn(warning_message)
213        else: ### connector doesn't yet exist
214            refresh = True
215
216    ### only create an object if refresh is True
217    ### (can be manually specified, otherwise determined above)
218    if refresh:
219        with _locks['connectors']:
220            try:
221                ### will raise an error if configuration is incorrect / missing
222                conn = types[type](label=label, **kw)
223                connectors[type][label] = conn
224            except InvalidAttributesError as ie:
225                warn(
226                    f"Incorrect attributes for connector '{type}:{label}'.\n"
227                    + str(ie),
228                    stack = False,
229                )
230                conn = None
231            except Exception as e:
232                from meerschaum.utils.formatting import get_console
233                console = get_console()
234                if console:
235                    console.print_exception()
236                warn(
237                    f"Exception when creating connector '{type}:{label}'.\n" + str(e),
238                    stack = False,
239                )
240                conn = None
241        if conn is None:
242            return None
243
244    return connectors[type][label]
245
246
247def is_connected(keys: str, **kw) -> bool:
248    """
249    Check if the connector keys correspond to an active connection.
250    If the connector has not been created, it will immediately return `False`.
251    If the connector exists but cannot communicate with the source, return `False`.
252    
253    **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`).
254    Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
255
256    Parameters
257    ----------
258    keys:
259        The keys to the connector (e.g. `'sql:main'`).
260        
261    Returns
262    -------
263    A `bool` corresponding to whether a successful connection may be made.
264
265    """
266    import warnings
267    if ':' not in keys:
268        warn(f"Invalid connector keys '{keys}'")
269
270    try:
271        typ, label = keys.split(':')
272    except Exception:
273        return False
274    if typ not in instance_types:
275        return False
276    if label not in connectors.get(typ, {}):
277        return False
278
279    from meerschaum.connectors.parse import parse_instance_keys
280    conn = parse_instance_keys(keys)
281    try:
282        with warnings.catch_warnings():
283            warnings.filterwarnings('ignore')
284            return conn.test_connection(**kw)
285    except Exception:
286        return False
287
288
289def make_connector(cls, _is_executor: bool = False):
290    """
291    Register a class as a `Connector`.
292    The `type` will be the lower case of the class name, without the suffix `connector`.
293
294    Parameters
295    ----------
296    instance: bool, default False
297        If `True`, make this connector type an instance connector.
298        This requires implementing the various pipes functions and lots of testing.
299
300    Examples
301    --------
302    >>> import meerschaum as mrsm
303    >>> from meerschaum.connectors import make_connector, Connector
304    >>> 
305    >>> @make_connector
306    >>> class FooConnector(Connector):
307    ...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
308    ... 
309    >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
310    >>> print(conn.username, conn.password)
311    dog cat
312    >>> 
313    """
314    import re
315    suffix_regex = (
316        r'connector$'
317        if not _is_executor
318        else r'executor$'
319    )
320    typ = re.sub(suffix_regex, '', cls.__name__.lower())
321    with _locks['types']:
322        types[typ] = cls
323    with _locks['custom_types']:
324        custom_types.add(typ)
325    with _locks['connectors']:
326        if typ not in connectors:
327            connectors[typ] = {}
328    if getattr(cls, 'IS_INSTANCE', False):
329        with _locks['instance_types']:
330            if typ not in instance_types:
331                instance_types.append(typ)
332
333    return cls
334
335
336def load_plugin_connectors():
337    """
338    If a plugin makes use of the `make_connector` decorator,
339    load its module.
340    """
341    from meerschaum.plugins import get_plugins, import_plugins
342    to_import = []
343    for plugin in get_plugins():
344        if plugin is None:
345            continue
346        with open(plugin.__file__, encoding='utf-8') as f:
347            text = f.read()
348        if 'make_connector' in text or 'Connector' in text:
349            to_import.append(plugin.name)
350    if not to_import:
351        return
352    import_plugins(*to_import)
353
354
355def get_connector_plugin(
356    connector: Connector,
357) -> Union[str, None, mrsm.Plugin]:
358    """
359    Determine the plugin for a connector.
360    This is useful for handling virtual environments for custom instance connectors.
361
362    Parameters
363    ----------
364    connector: Connector
365        The connector which may require a virtual environment.
366
367    Returns
368    -------
369    A Plugin, 'mrsm', or None.
370    """
371    if not hasattr(connector, 'type'):
372        return None
373    plugin_name = (
374        connector.__module__.replace('plugins.', '').split('.')[0]
375        if connector.type in custom_types else (
376            connector.label
377            if connector.type == 'plugin'
378            else 'mrsm'
379        )
380    )
381    plugin = mrsm.Plugin(plugin_name)
382    return plugin if plugin.is_installed() else None
383
384
385def _load_builtin_custom_connectors():
386    """
387    Import custom connectors decorated with `@make_connector` or `@make_executor`.
388    """
389    import meerschaum.jobs.systemd
390    import meerschaum.connectors.valkey
def make_connector(cls, _is_executor: bool = False):
290def make_connector(cls, _is_executor: bool = False):
291    """
292    Register a class as a `Connector`.
293    The `type` will be the lower case of the class name, without the suffix `connector`.
294
295    Parameters
296    ----------
297    instance: bool, default False
298        If `True`, make this connector type an instance connector.
299        This requires implementing the various pipes functions and lots of testing.
300
301    Examples
302    --------
303    >>> import meerschaum as mrsm
304    >>> from meerschaum.connectors import make_connector, Connector
305    >>> 
306    >>> @make_connector
307    >>> class FooConnector(Connector):
308    ...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
309    ... 
310    >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
311    >>> print(conn.username, conn.password)
312    dog cat
313    >>> 
314    """
315    import re
316    suffix_regex = (
317        r'connector$'
318        if not _is_executor
319        else r'executor$'
320    )
321    typ = re.sub(suffix_regex, '', cls.__name__.lower())
322    with _locks['types']:
323        types[typ] = cls
324    with _locks['custom_types']:
325        custom_types.add(typ)
326    with _locks['connectors']:
327        if typ not in connectors:
328            connectors[typ] = {}
329    if getattr(cls, 'IS_INSTANCE', False):
330        with _locks['instance_types']:
331            if typ not in instance_types:
332                instance_types.append(typ)
333
334    return cls

Register a class as a Connector. The type will be the lower case of the class name, without the suffix connector.

Parameters
  • instance (bool, default False): If True, make this connector type an instance connector. This requires implementing the various pipes functions and lots of testing.
Examples
>>> import meerschaum as mrsm
>>> from meerschaum.connectors import make_connector, Connector
>>> 
>>> @make_connector
>>> class FooConnector(Connector):
...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
... 
>>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
>>> print(conn.username, conn.password)
dog cat
>>>
class Connector:
 20class Connector(metaclass=abc.ABCMeta):
 21    """
 22    The base connector class to hold connection attributes.
 23    """
 24    def __init__(
 25        self,
 26        type: Optional[str] = None,
 27        label: Optional[str] = None,
 28        **kw: Any
 29    ):
 30        """
 31        Set the given keyword arguments as attributes.
 32
 33        Parameters
 34        ----------
 35        type: str
 36            The `type` of the connector (e.g. `sql`, `api`, `plugin`).
 37
 38        label: str
 39            The `label` for the connector.
 40
 41
 42        Examples
 43        --------
 44        Run `mrsm edit config` and to edit connectors in the YAML file:
 45
 46        ```yaml
 47        meerschaum:
 48            connections:
 49                {type}:
 50                    {label}:
 51                        ### attributes go here
 52        ```
 53
 54        """
 55        self._original_dict = copy.deepcopy(self.__dict__)
 56        self._set_attributes(type=type, label=label, **kw)
 57
 58        ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set.
 59        self.verify_attributes(
 60            ['uri']
 61            if 'uri' in self.__dict__
 62            else getattr(self, 'REQUIRED_ATTRIBUTES', None)
 63        )
 64
 65    def _reset_attributes(self):
 66        self.__dict__ = self._original_dict
 67
 68    def _set_attributes(
 69        self,
 70        *args,
 71        inherit_default: bool = True,
 72        **kw: Any
 73    ):
 74        from meerschaum.config.static import STATIC_CONFIG
 75        from meerschaum.utils.warnings import error
 76
 77        self._attributes = {}
 78
 79        default_label = STATIC_CONFIG['connectors']['default_label']
 80
 81        ### NOTE: Support the legacy method of explicitly passing the type.
 82        label = kw.get('label', None)
 83        if label is None:
 84            if len(args) == 2:
 85                label = args[1]
 86            elif len(args) == 0:
 87                label = None
 88            else:
 89                label = args[0]
 90
 91        if label == 'default':
 92            error(
 93                f"Label cannot be 'default'. Did you mean '{default_label}'?",
 94                InvalidAttributesError,
 95            )
 96        self.__dict__['label'] = label
 97
 98        from meerschaum.config import get_config
 99        conn_configs = copy.deepcopy(get_config('meerschaum', 'connectors'))
100        connector_config = copy.deepcopy(get_config('system', 'connectors'))
101
102        ### inherit attributes from 'default' if exists
103        if inherit_default:
104            inherit_from = 'default'
105            if self.type in conn_configs and inherit_from in conn_configs[self.type]:
106                _inherit_dict = copy.deepcopy(conn_configs[self.type][inherit_from])
107                self._attributes.update(_inherit_dict)
108
109        ### load user config into self._attributes
110        if self.type in conn_configs and self.label in conn_configs[self.type]:
111            self._attributes.update(conn_configs[self.type][self.label] or {})
112
113        ### load system config into self._sys_config
114        ### (deep copy so future Connectors don't inherit changes)
115        if self.type in connector_config:
116            self._sys_config = copy.deepcopy(connector_config[self.type])
117
118        ### add additional arguments or override configuration
119        self._attributes.update(kw)
120
121        ### finally, update __dict__ with _attributes.
122        self.__dict__.update(self._attributes)
123
124    def verify_attributes(
125        self,
126        required_attributes: Optional[List[str]] = None,
127        debug: bool = False,
128    ) -> None:
129        """
130        Ensure that the required attributes have been met.
131        
132        The Connector base class checks the minimum requirements.
133        Child classes may enforce additional requirements.
134
135        Parameters
136        ----------
137        required_attributes: Optional[List[str]], default None
138            Attributes to be verified. If `None`, default to `['label']`.
139
140        debug: bool, default False
141            Verbosity toggle.
142
143        Returns
144        -------
145        Don't return anything.
146
147        Raises
148        ------
149        An error if any of the required attributes are missing.
150        """
151        from meerschaum.utils.warnings import error, warn
152        from meerschaum.utils.debug import dprint
153        from meerschaum.utils.misc import items_str
154        if required_attributes is None:
155            required_attributes = ['label']
156
157        missing_attributes = set()
158        for a in required_attributes:
159            if a not in self.__dict__:
160                missing_attributes.add(a)
161        if len(missing_attributes) > 0:
162            error(
163                (
164                    f"Missing {items_str(list(missing_attributes))} "
165                    + f"for connector '{self.type}:{self.label}'."
166                ),
167                InvalidAttributesError,
168                silent=True,
169                stack=False
170            )
171
172
173    def __str__(self):
174        """
175        When cast to a string, return type:label.
176        """
177        return f"{self.type}:{self.label}"
178
179    def __repr__(self):
180        """
181        Represent the connector as type:label.
182        """
183        return str(self)
184
185    @property
186    def meta(self) -> Dict[str, Any]:
187        """
188        Return the keys needed to reconstruct this Connector.
189        """
190        _meta = {
191            key: value
192            for key, value in self.__dict__.items()
193            if not str(key).startswith('_')
194        }
195        _meta.update({
196            'type': self.type,
197            'label': self.label,
198        })
199        return _meta
200
201
202    @property
203    def type(self) -> str:
204        """
205        Return the type for this connector.
206        """
207        _type = self.__dict__.get('type', None)
208        if _type is None:
209            import re
210            is_executor = self.__class__.__name__.lower().endswith('executor')
211            suffix_regex = (
212                r'connector$'
213                if not is_executor
214                else r'executor$'
215            )
216            _type = re.sub(suffix_regex, '', self.__class__.__name__.lower())
217            self.__dict__['type'] = _type
218        return _type
219
220
221    @property
222    def label(self) -> str:
223        """
224        Return the label for this connector.
225        """
226        _label = self.__dict__.get('label', None)
227        if _label is None:
228            from meerschaum.config.static import STATIC_CONFIG
229            _label = STATIC_CONFIG['connectors']['default_label']
230            self.__dict__['label'] = _label
231        return _label

The base connector class to hold connection attributes.

Connector(type: Optional[str] = None, label: Optional[str] = None, **kw: Any)
24    def __init__(
25        self,
26        type: Optional[str] = None,
27        label: Optional[str] = None,
28        **kw: Any
29    ):
30        """
31        Set the given keyword arguments as attributes.
32
33        Parameters
34        ----------
35        type: str
36            The `type` of the connector (e.g. `sql`, `api`, `plugin`).
37
38        label: str
39            The `label` for the connector.
40
41
42        Examples
43        --------
44        Run `mrsm edit config` and to edit connectors in the YAML file:
45
46        ```yaml
47        meerschaum:
48            connections:
49                {type}:
50                    {label}:
51                        ### attributes go here
52        ```
53
54        """
55        self._original_dict = copy.deepcopy(self.__dict__)
56        self._set_attributes(type=type, label=label, **kw)
57
58        ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set.
59        self.verify_attributes(
60            ['uri']
61            if 'uri' in self.__dict__
62            else getattr(self, 'REQUIRED_ATTRIBUTES', None)
63        )

Set the given keyword arguments as attributes.

Parameters
  • type (str): The type of the connector (e.g. sql, api, plugin).
  • label (str): The label for the connector.
Examples

Run mrsm edit config and to edit connectors in the YAML file:

meerschaum:
    connections:
        {type}:
            {label}:
                ### attributes go here
def verify_attributes( self, required_attributes: Optional[List[str]] = None, debug: bool = False) -> None:
124    def verify_attributes(
125        self,
126        required_attributes: Optional[List[str]] = None,
127        debug: bool = False,
128    ) -> None:
129        """
130        Ensure that the required attributes have been met.
131        
132        The Connector base class checks the minimum requirements.
133        Child classes may enforce additional requirements.
134
135        Parameters
136        ----------
137        required_attributes: Optional[List[str]], default None
138            Attributes to be verified. If `None`, default to `['label']`.
139
140        debug: bool, default False
141            Verbosity toggle.
142
143        Returns
144        -------
145        Don't return anything.
146
147        Raises
148        ------
149        An error if any of the required attributes are missing.
150        """
151        from meerschaum.utils.warnings import error, warn
152        from meerschaum.utils.debug import dprint
153        from meerschaum.utils.misc import items_str
154        if required_attributes is None:
155            required_attributes = ['label']
156
157        missing_attributes = set()
158        for a in required_attributes:
159            if a not in self.__dict__:
160                missing_attributes.add(a)
161        if len(missing_attributes) > 0:
162            error(
163                (
164                    f"Missing {items_str(list(missing_attributes))} "
165                    + f"for connector '{self.type}:{self.label}'."
166                ),
167                InvalidAttributesError,
168                silent=True,
169                stack=False
170            )

Ensure that the required attributes have been met.

The Connector base class checks the minimum requirements. Child classes may enforce additional requirements.

Parameters
  • required_attributes (Optional[List[str]], default None): Attributes to be verified. If None, default to ['label'].
  • debug (bool, default False): Verbosity toggle.
Returns
  • Don't return anything.
Raises
  • An error if any of the required attributes are missing.
meta: Dict[str, Any]
185    @property
186    def meta(self) -> Dict[str, Any]:
187        """
188        Return the keys needed to reconstruct this Connector.
189        """
190        _meta = {
191            key: value
192            for key, value in self.__dict__.items()
193            if not str(key).startswith('_')
194        }
195        _meta.update({
196            'type': self.type,
197            'label': self.label,
198        })
199        return _meta

Return the keys needed to reconstruct this Connector.

type: str
202    @property
203    def type(self) -> str:
204        """
205        Return the type for this connector.
206        """
207        _type = self.__dict__.get('type', None)
208        if _type is None:
209            import re
210            is_executor = self.__class__.__name__.lower().endswith('executor')
211            suffix_regex = (
212                r'connector$'
213                if not is_executor
214                else r'executor$'
215            )
216            _type = re.sub(suffix_regex, '', self.__class__.__name__.lower())
217            self.__dict__['type'] = _type
218        return _type

Return the type for this connector.

label: str
221    @property
222    def label(self) -> str:
223        """
224        Return the label for this connector.
225        """
226        _label = self.__dict__.get('label', None)
227        if _label is None:
228            from meerschaum.config.static import STATIC_CONFIG
229            _label = STATIC_CONFIG['connectors']['default_label']
230            self.__dict__['label'] = _label
231        return _label

Return the label for this connector.

class SQLConnector(meerschaum.connectors.Connector):
 18class SQLConnector(Connector):
 19    """
 20    Connect to SQL databases via `sqlalchemy`.
 21    
 22    SQLConnectors may be used as Meerschaum instance connectors.
 23    Read more about connectors and instances at
 24    https://meerschaum.io/reference/connectors/
 25
 26    """
 27
 28    IS_INSTANCE: bool = True
 29
 30    from ._create_engine import flavor_configs, create_engine
 31    from ._sql import (
 32        read,
 33        value,
 34        exec,
 35        execute,
 36        to_sql,
 37        exec_queries,
 38        get_connection,
 39        _cleanup_connections,
 40    )
 41    from meerschaum.utils.sql import test_connection
 42    from ._fetch import fetch, get_pipe_metadef
 43    from ._cli import cli, _cli_exit
 44    from ._pipes import (
 45        fetch_pipes_keys,
 46        create_indices,
 47        drop_indices,
 48        get_create_index_queries,
 49        get_drop_index_queries,
 50        get_add_columns_queries,
 51        get_alter_columns_queries,
 52        delete_pipe,
 53        get_pipe_data,
 54        get_pipe_data_query,
 55        register_pipe,
 56        edit_pipe,
 57        get_pipe_id,
 58        get_pipe_attributes,
 59        sync_pipe,
 60        sync_pipe_inplace,
 61        get_sync_time,
 62        pipe_exists,
 63        get_pipe_rowcount,
 64        drop_pipe,
 65        clear_pipe,
 66        deduplicate_pipe,
 67        get_pipe_table,
 68        get_pipe_columns_types,
 69        get_to_sql_dtype,
 70        get_pipe_schema,
 71        create_pipe_table_from_df,
 72        get_pipe_columns_indices,
 73    )
 74    from ._plugins import (
 75        register_plugin,
 76        delete_plugin,
 77        get_plugin_id,
 78        get_plugin_version,
 79        get_plugins,
 80        get_plugin_user_id,
 81        get_plugin_username,
 82        get_plugin_attributes,
 83    )
 84    from ._users import (
 85        register_user,
 86        get_user_id,
 87        get_users,
 88        edit_user,
 89        delete_user,
 90        get_user_password_hash,
 91        get_user_type,
 92        get_user_attributes,
 93    )
 94    from ._uri import from_uri, parse_uri
 95    from ._instance import (
 96        _log_temporary_tables_creation,
 97        _drop_temporary_table,
 98        _drop_temporary_tables,
 99        _drop_old_temporary_tables,
100    )
101
102    def __init__(
103        self,
104        label: Optional[str] = None,
105        flavor: Optional[str] = None,
106        wait: bool = False,
107        connect: bool = False,
108        debug: bool = False,
109        **kw: Any
110    ):
111        """
112        Parameters
113        ----------
114        label: str, default 'main'
115            The identifying label for the connector.
116            E.g. for `sql:main`, 'main' is the label.
117            Defaults to 'main'.
118
119        flavor: Optional[str], default None
120            The database flavor, e.g.
121            `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc.
122            To see supported flavors, run the `bootstrap connectors` command.
123
124        wait: bool, default False
125            If `True`, block until a database connection has been made.
126            Defaults to `False`.
127
128        connect: bool, default False
129            If `True`, immediately attempt to connect the database and raise
130            a warning if the connection fails.
131            Defaults to `False`.
132
133        debug: bool, default False
134            Verbosity toggle.
135            Defaults to `False`.
136
137        kw: Any
138            All other arguments will be passed to the connector's attributes.
139            Therefore, a connector may be made without being registered,
140            as long enough parameters are supplied to the constructor.
141        """
142        if 'uri' in kw:
143            uri = kw['uri']
144            if uri.startswith('postgres') and not uri.startswith('postgresql'):
145                uri = uri.replace('postgres', 'postgresql', 1)
146            if uri.startswith('postgresql') and not uri.startswith('postgresql+'):
147                uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1)
148            if uri.startswith('timescaledb://'):
149                uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1)
150                flavor = 'timescaledb'
151            kw['uri'] = uri
152            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
153            label = label or from_uri_params.get('label', None)
154            _ = from_uri_params.pop('label', None)
155
156            ### Sometimes the flavor may be provided with a URI.
157            kw.update(from_uri_params)
158            if flavor:
159                kw['flavor'] = flavor
160
161        ### set __dict__ in base class
162        super().__init__(
163            'sql',
164            label = label or self.__dict__.get('label', None),
165            **kw
166        )
167
168        if self.__dict__.get('flavor', None) == 'sqlite':
169            self._reset_attributes()
170            self._set_attributes(
171                'sql',
172                label = label,
173                inherit_default = False,
174                **kw
175            )
176            ### For backwards compatability reasons, set the path for sql:local if its missing.
177            if self.label == 'local' and not self.__dict__.get('database', None):
178                from meerschaum.config._paths import SQLITE_DB_PATH
179                self.database = str(SQLITE_DB_PATH)
180
181        ### ensure flavor and label are set accordingly
182        if 'flavor' not in self.__dict__:
183            if flavor is None and 'uri' not in self.__dict__:
184                raise Exception(
185                    f"    Missing flavor. Provide flavor as a key for '{self}'."
186                )
187            self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None)
188
189        if self.flavor == 'postgres':
190            self.flavor = 'postgresql'
191
192        self._debug = debug
193        ### Store the PID and thread at initialization
194        ### so we can dispose of the Pool in child processes or threads.
195        import os, threading
196        self._pid = os.getpid()
197        self._thread_ident = threading.current_thread().ident
198        self._sessions = {}
199        self._locks = {'_sessions': threading.RLock(), }
200
201        ### verify the flavor's requirements are met
202        if self.flavor not in self.flavor_configs:
203            error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector")
204        if not self.__dict__.get('uri'):
205            self.verify_attributes(
206                self.flavor_configs[self.flavor].get('requirements', set()),
207                debug=debug,
208            )
209
210        if wait:
211            from meerschaum.connectors.poll import retry_connect
212            retry_connect(connector=self, debug=debug)
213
214        if connect:
215            if not self.test_connection(debug=debug):
216                warn(f"Failed to connect with connector '{self}'!", stack=False)
217
218    @property
219    def Session(self):
220        if '_Session' not in self.__dict__:
221            if self.engine is None:
222                return None
223
224            from meerschaum.utils.packages import attempt_import
225            sqlalchemy_orm = attempt_import('sqlalchemy.orm')
226            session_factory = sqlalchemy_orm.sessionmaker(self.engine)
227            self._Session = sqlalchemy_orm.scoped_session(session_factory)
228
229        return self._Session
230
231    @property
232    def engine(self):
233        """
234        Return the SQLAlchemy engine connected to the configured database.
235        """
236        import os
237        import threading
238        if '_engine' not in self.__dict__:
239            self._engine, self._engine_str = self.create_engine(include_uri=True)
240
241        same_process = os.getpid() == self._pid
242        same_thread = threading.current_thread().ident == self._thread_ident
243
244        ### handle child processes
245        if not same_process:
246            self._pid = os.getpid()
247            self._thread = threading.current_thread()
248            warn("Different PID detected. Disposing of connections...")
249            self._engine.dispose()
250
251        ### handle different threads
252        if not same_thread:
253            if self.flavor == 'duckdb':
254                warn("Different thread detected.")
255                self._engine.dispose()
256
257        return self._engine
258
259    @property
260    def DATABASE_URL(self) -> str:
261        """
262        Return the URI connection string (alias for `SQLConnector.URI`.
263        """
264        _ = self.engine
265        return str(self._engine_str)
266
267    @property
268    def URI(self) -> str:
269        """
270        Return the URI connection string.
271        """
272        _ = self.engine
273        return str(self._engine_str)
274
275    @property
276    def IS_THREAD_SAFE(self) -> str:
277        """
278        Return whether this connector may be multithreaded.
279        """
280        if self.flavor in ('duckdb', 'oracle'):
281            return False
282        if self.flavor == 'sqlite':
283            return ':memory:' not in self.URI
284        return True
285
286
287    @property
288    def metadata(self):
289        """
290        Return the metadata bound to this configured schema.
291        """
292        from meerschaum.utils.packages import attempt_import
293        sqlalchemy = attempt_import('sqlalchemy')
294        if '_metadata' not in self.__dict__:
295            self._metadata = sqlalchemy.MetaData(schema=self.schema)
296        return self._metadata
297
298
299    @property
300    def instance_schema(self):
301        """
302        Return the schema name for Meerschaum tables. 
303        """
304        return self.schema
305
306
307    @property
308    def internal_schema(self):
309        """
310        Return the schema name for internal tables. 
311        """
312        from meerschaum.config.static import STATIC_CONFIG
313        from meerschaum.utils.packages import attempt_import
314        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
315        schema_name = self.__dict__.get('internal_schema', None) or (
316            STATIC_CONFIG['sql']['internal_schema']
317            if self.flavor not in NO_SCHEMA_FLAVORS
318            else self.schema
319        )
320
321        if '_internal_schema' not in self.__dict__:
322            self._internal_schema = schema_name
323        return self._internal_schema
324
325
326    @property
327    def db(self) -> Optional[databases.Database]:
328        from meerschaum.utils.packages import attempt_import
329        databases = attempt_import('databases', lazy=False, install=True)
330        url = self.DATABASE_URL
331        if 'mysql' in url:
332            url = url.replace('+pymysql', '')
333        if '_db' not in self.__dict__:
334            try:
335                self._db = databases.Database(url)
336            except KeyError:
337                ### Likely encountered an unsupported flavor.
338                from meerschaum.utils.warnings import warn
339                self._db = None
340        return self._db
341
342
343    @property
344    def db_version(self) -> Union[str, None]:
345        """
346        Return the database version.
347        """
348        _db_version = self.__dict__.get('_db_version', None)
349        if _db_version is not None:
350            return _db_version
351
352        from meerschaum.utils.sql import get_db_version
353        self._db_version = get_db_version(self)
354        return self._db_version
355
356
357    @property
358    def schema(self) -> Union[str, None]:
359        """
360        Return the default schema to use.
361        A value of `None` will not prepend a schema.
362        """
363        if 'schema' in self.__dict__:
364            return self.__dict__['schema']
365
366        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
367        if self.flavor in NO_SCHEMA_FLAVORS:
368            self.__dict__['schema'] = None
369            return None
370
371        sqlalchemy = mrsm.attempt_import('sqlalchemy')
372        _schema = sqlalchemy.inspect(self.engine).default_schema_name
373        self.__dict__['schema'] = _schema
374        return _schema
375
376
377    def __getstate__(self):
378        return self.__dict__
379
380    def __setstate__(self, d):
381        self.__dict__.update(d)
382
383    def __call__(self):
384        return self

Connect to SQL databases via sqlalchemy.

SQLConnectors may be used as Meerschaum instance connectors. Read more about connectors and instances at https://meerschaum.io/reference/connectors/

SQLConnector( label: Optional[str] = None, flavor: Optional[str] = None, wait: bool = False, connect: bool = False, debug: bool = False, **kw: Any)
102    def __init__(
103        self,
104        label: Optional[str] = None,
105        flavor: Optional[str] = None,
106        wait: bool = False,
107        connect: bool = False,
108        debug: bool = False,
109        **kw: Any
110    ):
111        """
112        Parameters
113        ----------
114        label: str, default 'main'
115            The identifying label for the connector.
116            E.g. for `sql:main`, 'main' is the label.
117            Defaults to 'main'.
118
119        flavor: Optional[str], default None
120            The database flavor, e.g.
121            `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc.
122            To see supported flavors, run the `bootstrap connectors` command.
123
124        wait: bool, default False
125            If `True`, block until a database connection has been made.
126            Defaults to `False`.
127
128        connect: bool, default False
129            If `True`, immediately attempt to connect the database and raise
130            a warning if the connection fails.
131            Defaults to `False`.
132
133        debug: bool, default False
134            Verbosity toggle.
135            Defaults to `False`.
136
137        kw: Any
138            All other arguments will be passed to the connector's attributes.
139            Therefore, a connector may be made without being registered,
140            as long enough parameters are supplied to the constructor.
141        """
142        if 'uri' in kw:
143            uri = kw['uri']
144            if uri.startswith('postgres') and not uri.startswith('postgresql'):
145                uri = uri.replace('postgres', 'postgresql', 1)
146            if uri.startswith('postgresql') and not uri.startswith('postgresql+'):
147                uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1)
148            if uri.startswith('timescaledb://'):
149                uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1)
150                flavor = 'timescaledb'
151            kw['uri'] = uri
152            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
153            label = label or from_uri_params.get('label', None)
154            _ = from_uri_params.pop('label', None)
155
156            ### Sometimes the flavor may be provided with a URI.
157            kw.update(from_uri_params)
158            if flavor:
159                kw['flavor'] = flavor
160
161        ### set __dict__ in base class
162        super().__init__(
163            'sql',
164            label = label or self.__dict__.get('label', None),
165            **kw
166        )
167
168        if self.__dict__.get('flavor', None) == 'sqlite':
169            self._reset_attributes()
170            self._set_attributes(
171                'sql',
172                label = label,
173                inherit_default = False,
174                **kw
175            )
176            ### For backwards compatability reasons, set the path for sql:local if its missing.
177            if self.label == 'local' and not self.__dict__.get('database', None):
178                from meerschaum.config._paths import SQLITE_DB_PATH
179                self.database = str(SQLITE_DB_PATH)
180
181        ### ensure flavor and label are set accordingly
182        if 'flavor' not in self.__dict__:
183            if flavor is None and 'uri' not in self.__dict__:
184                raise Exception(
185                    f"    Missing flavor. Provide flavor as a key for '{self}'."
186                )
187            self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None)
188
189        if self.flavor == 'postgres':
190            self.flavor = 'postgresql'
191
192        self._debug = debug
193        ### Store the PID and thread at initialization
194        ### so we can dispose of the Pool in child processes or threads.
195        import os, threading
196        self._pid = os.getpid()
197        self._thread_ident = threading.current_thread().ident
198        self._sessions = {}
199        self._locks = {'_sessions': threading.RLock(), }
200
201        ### verify the flavor's requirements are met
202        if self.flavor not in self.flavor_configs:
203            error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector")
204        if not self.__dict__.get('uri'):
205            self.verify_attributes(
206                self.flavor_configs[self.flavor].get('requirements', set()),
207                debug=debug,
208            )
209
210        if wait:
211            from meerschaum.connectors.poll import retry_connect
212            retry_connect(connector=self, debug=debug)
213
214        if connect:
215            if not self.test_connection(debug=debug):
216                warn(f"Failed to connect with connector '{self}'!", stack=False)
Parameters
  • label (str, default 'main'): The identifying label for the connector. E.g. for sql:main, 'main' is the label. Defaults to 'main'.
  • flavor (Optional[str], default None): The database flavor, e.g. 'sqlite', 'postgresql', 'cockroachdb', etc. To see supported flavors, run the bootstrap connectors command.
  • wait (bool, default False): If True, block until a database connection has been made. Defaults to False.
  • connect (bool, default False): If True, immediately attempt to connect the database and raise a warning if the connection fails. Defaults to False.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
  • kw (Any): All other arguments will be passed to the connector's attributes. Therefore, a connector may be made without being registered, as long enough parameters are supplied to the constructor.
IS_INSTANCE: bool = True
Session
218    @property
219    def Session(self):
220        if '_Session' not in self.__dict__:
221            if self.engine is None:
222                return None
223
224            from meerschaum.utils.packages import attempt_import
225            sqlalchemy_orm = attempt_import('sqlalchemy.orm')
226            session_factory = sqlalchemy_orm.sessionmaker(self.engine)
227            self._Session = sqlalchemy_orm.scoped_session(session_factory)
228
229        return self._Session
engine
231    @property
232    def engine(self):
233        """
234        Return the SQLAlchemy engine connected to the configured database.
235        """
236        import os
237        import threading
238        if '_engine' not in self.__dict__:
239            self._engine, self._engine_str = self.create_engine(include_uri=True)
240
241        same_process = os.getpid() == self._pid
242        same_thread = threading.current_thread().ident == self._thread_ident
243
244        ### handle child processes
245        if not same_process:
246            self._pid = os.getpid()
247            self._thread = threading.current_thread()
248            warn("Different PID detected. Disposing of connections...")
249            self._engine.dispose()
250
251        ### handle different threads
252        if not same_thread:
253            if self.flavor == 'duckdb':
254                warn("Different thread detected.")
255                self._engine.dispose()
256
257        return self._engine

Return the SQLAlchemy engine connected to the configured database.

DATABASE_URL: str
259    @property
260    def DATABASE_URL(self) -> str:
261        """
262        Return the URI connection string (alias for `SQLConnector.URI`.
263        """
264        _ = self.engine
265        return str(self._engine_str)

Return the URI connection string (alias for SQLConnector.URI.

URI: str
267    @property
268    def URI(self) -> str:
269        """
270        Return the URI connection string.
271        """
272        _ = self.engine
273        return str(self._engine_str)

Return the URI connection string.

IS_THREAD_SAFE: str
275    @property
276    def IS_THREAD_SAFE(self) -> str:
277        """
278        Return whether this connector may be multithreaded.
279        """
280        if self.flavor in ('duckdb', 'oracle'):
281            return False
282        if self.flavor == 'sqlite':
283            return ':memory:' not in self.URI
284        return True

Return whether this connector may be multithreaded.

metadata
287    @property
288    def metadata(self):
289        """
290        Return the metadata bound to this configured schema.
291        """
292        from meerschaum.utils.packages import attempt_import
293        sqlalchemy = attempt_import('sqlalchemy')
294        if '_metadata' not in self.__dict__:
295            self._metadata = sqlalchemy.MetaData(schema=self.schema)
296        return self._metadata

Return the metadata bound to this configured schema.

instance_schema
299    @property
300    def instance_schema(self):
301        """
302        Return the schema name for Meerschaum tables. 
303        """
304        return self.schema

Return the schema name for Meerschaum tables.

internal_schema
307    @property
308    def internal_schema(self):
309        """
310        Return the schema name for internal tables. 
311        """
312        from meerschaum.config.static import STATIC_CONFIG
313        from meerschaum.utils.packages import attempt_import
314        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
315        schema_name = self.__dict__.get('internal_schema', None) or (
316            STATIC_CONFIG['sql']['internal_schema']
317            if self.flavor not in NO_SCHEMA_FLAVORS
318            else self.schema
319        )
320
321        if '_internal_schema' not in self.__dict__:
322            self._internal_schema = schema_name
323        return self._internal_schema

Return the schema name for internal tables.

db: 'Optional[databases.Database]'
326    @property
327    def db(self) -> Optional[databases.Database]:
328        from meerschaum.utils.packages import attempt_import
329        databases = attempt_import('databases', lazy=False, install=True)
330        url = self.DATABASE_URL
331        if 'mysql' in url:
332            url = url.replace('+pymysql', '')
333        if '_db' not in self.__dict__:
334            try:
335                self._db = databases.Database(url)
336            except KeyError:
337                ### Likely encountered an unsupported flavor.
338                from meerschaum.utils.warnings import warn
339                self._db = None
340        return self._db
db_version: Optional[str]
343    @property
344    def db_version(self) -> Union[str, None]:
345        """
346        Return the database version.
347        """
348        _db_version = self.__dict__.get('_db_version', None)
349        if _db_version is not None:
350            return _db_version
351
352        from meerschaum.utils.sql import get_db_version
353        self._db_version = get_db_version(self)
354        return self._db_version

Return the database version.

schema: Optional[str]
357    @property
358    def schema(self) -> Union[str, None]:
359        """
360        Return the default schema to use.
361        A value of `None` will not prepend a schema.
362        """
363        if 'schema' in self.__dict__:
364            return self.__dict__['schema']
365
366        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
367        if self.flavor in NO_SCHEMA_FLAVORS:
368            self.__dict__['schema'] = None
369            return None
370
371        sqlalchemy = mrsm.attempt_import('sqlalchemy')
372        _schema = sqlalchemy.inspect(self.engine).default_schema_name
373        self.__dict__['schema'] = _schema
374        return _schema

Return the default schema to use. A value of None will not prepend a schema.

flavor_configs = {'timescaledb': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'host', 'database', 'username', 'password'}, 'defaults': {'port': 5432}}, 'postgresql': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'host', 'database', 'username', 'password'}, 'defaults': {'port': 5432}}, 'citus': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'host', 'database', 'username', 'password'}, 'defaults': {'port': 5432}}, 'mssql': {'engine': 'mssql+pyodbc', 'create_engine': {'fast_executemany': True, 'isolation_level': 'AUTOCOMMIT', 'use_setinputsizes': False, 'pool_pre_ping': True, 'ignore_no_transaction_on_rollback': True}, 'omit_create_engine': {'method'}, 'to_sql': {'method': None}, 'requirements': {'host', 'database', 'username', 'password'}, 'defaults': {'port': 1433, 'options': 'driver=ODBC Driver 18 for SQL Server&UseFMTONLY=Yes&TrustServerCertificate=yes&Encrypt=no&MARS_Connection=yes'}}, 'mysql': {'engine': 'mysql+pymysql', 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'host', 'database', 'username', 'password'}, 'defaults': {'port': 3306}}, 'mariadb': {'engine': 'mysql+pymysql', 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'host', 'database', 'username', 'password'}, 'defaults': {'port': 3306}}, 'oracle': {'engine': 'oracle+cx_oracle', 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': None}, 'requirements': {'host', 'database', 'username', 'password'}, 'defaults': {'port': 1521}}, 'sqlite': {'engine': 'sqlite', 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'database'}, 'defaults': {}}, 'duckdb': {'engine': 'duckdb', 'create_engine': {}, 'omit_create_engine': {'ALL'}, 'to_sql': {'method': 'multi'}, 'requirements': '', 'defaults': {}}, 'cockroachdb': {'engine': 'cockroachdb', 'omit_create_engine': {'method'}, 'create_engine': {'pool_size': 5, 'max_overflow': 10, 'pool_recycle': 3600, 'connect_args': {}}, 'to_sql': {'method': 'multi'}, 'requirements': {'host'}, 'defaults': {'port': 26257, 'database': 'defaultdb', 'username': 'root', 'password': 'admin'}}}
def create_engine( self, include_uri: bool = False, debug: bool = False, **kw) -> 'sqlalchemy.engine.Engine':
180def create_engine(
181    self,
182    include_uri: bool = False,
183    debug: bool = False,
184    **kw
185) -> 'sqlalchemy.engine.Engine':
186    """Create a sqlalchemy engine by building the engine string."""
187    from meerschaum.utils.packages import attempt_import
188    from meerschaum.utils.warnings import error, warn
189    sqlalchemy = attempt_import('sqlalchemy')
190    import urllib
191    import copy
192    ### Install and patch required drivers.
193    if self.flavor in install_flavor_drivers:
194        attempt_import(*install_flavor_drivers[self.flavor], debug=debug, lazy=False, warn=False)
195        if self.flavor == 'mssql':
196            pyodbc = attempt_import('pyodbc', debug=debug, lazy=False, warn=False)
197            pyodbc.pooling = False
198    if self.flavor in require_patching_flavors:
199        from meerschaum.utils.packages import determine_version, _monkey_patch_get_distribution
200        import pathlib
201        for install_name, import_name in require_patching_flavors[self.flavor]:
202            pkg = attempt_import(
203                import_name,
204                debug=debug,
205                lazy=False,
206                warn=False
207            )
208            _monkey_patch_get_distribution(
209                install_name, determine_version(pathlib.Path(pkg.__file__), venv='mrsm')
210            )
211
212    ### supplement missing values with defaults (e.g. port number)
213    for a, value in flavor_configs[self.flavor]['defaults'].items():
214        if a not in self.__dict__:
215            self.__dict__[a] = value
216
217    ### Verify that everything is in order.
218    if self.flavor not in flavor_configs:
219        error(f"Cannot create a connector with the flavor '{self.flavor}'.")
220
221    _engine = flavor_configs[self.flavor].get('engine', None)
222    _username = self.__dict__.get('username', None)
223    _password = self.__dict__.get('password', None)
224    _host = self.__dict__.get('host', None)
225    _port = self.__dict__.get('port', None)
226    _database = self.__dict__.get('database', None)
227    _options = self.__dict__.get('options', {})
228    if isinstance(_options, str):
229        _options = dict(urllib.parse.parse_qsl(_options))
230    _uri = self.__dict__.get('uri', None)
231
232    ### Handle registering specific dialects (due to installing in virtual environments).
233    if self.flavor in flavor_dialects:
234        sqlalchemy.dialects.registry.register(*flavor_dialects[self.flavor])
235
236    ### self._sys_config was deepcopied and can be updated safely
237    if self.flavor in ("sqlite", "duckdb"):
238        engine_str = f"{_engine}:///{_database}" if not _uri else _uri
239        if 'create_engine' not in self._sys_config:
240            self._sys_config['create_engine'] = {}
241        if 'connect_args' not in self._sys_config['create_engine']:
242            self._sys_config['create_engine']['connect_args'] = {}
243        self._sys_config['create_engine']['connect_args'].update({"check_same_thread" : False})
244    else:
245        engine_str = (
246            _engine + "://" + (_username if _username is not None else '') +
247            ((":" + urllib.parse.quote_plus(_password)) if _password is not None else '') +
248            "@" + _host + ((":" + str(_port)) if _port is not None else '') +
249            (("/" + _database) if _database is not None else '')
250            + (("?" + urllib.parse.urlencode(_options)) if _options else '')
251        ) if not _uri else _uri
252
253        ### Sometimes the timescaledb:// flavor can slip in.
254        if _uri and self.flavor in ('timescaledb',) and self.flavor in _uri:
255            engine_str = engine_str.replace(f'{self.flavor}', 'postgresql', 1)
256
257    if debug:
258        dprint(
259            (
260                (engine_str.replace(':' + _password, ':' + ('*' * len(_password))))
261                    if _password is not None else engine_str
262            ) + '\n' + f"{self._sys_config.get('create_engine', {}).get('connect_args', {})}"
263        )
264
265    _kw_copy = copy.deepcopy(kw)
266
267    ### NOTE: Order of inheritance:
268    ###       1. Defaults
269    ###       2. System configuration
270    ###       3. Connector configuration
271    ###       4. Keyword arguments
272    _create_engine_args = flavor_configs.get(self.flavor, {}).get('create_engine', {})
273    def _apply_create_engine_args(update):
274        if 'ALL' not in flavor_configs[self.flavor].get('omit_create_engine', {}):
275            _create_engine_args.update(
276                { k: v for k, v in update.items()
277                    if 'omit_create_engine' not in flavor_configs[self.flavor]
278                        or k not in flavor_configs[self.flavor].get('omit_create_engine')
279                }
280            )
281    _apply_create_engine_args(self._sys_config.get('create_engine', {}))
282    _apply_create_engine_args(self.__dict__.get('create_engine', {}))
283    _apply_create_engine_args(_kw_copy)
284
285    try:
286        engine = sqlalchemy.create_engine(
287            engine_str,
288            ### I know this looks confusing, and maybe it's bad code,
289            ### but it's simple. It dynamically parses the config string
290            ### and splits it to separate the class name (QueuePool)
291            ### from the module name (sqlalchemy.pool).
292            poolclass    = getattr(
293                attempt_import(
294                    ".".join(self._sys_config['poolclass'].split('.')[:-1])
295                ),
296                self._sys_config['poolclass'].split('.')[-1]
297            ),
298            echo         = debug,
299            **_create_engine_args
300        )
301    except Exception as e:
302        warn(f"Failed to create connector '{self}':\n{traceback.format_exc()}", stack=False)
303        engine = None
304
305    if include_uri:
306        return engine, engine_str
307    return engine

Create a sqlalchemy engine by building the engine string.

def read( self, query_or_table: 'Union[str, sqlalchemy.Query]', params: Union[Dict[str, Any], List[str], NoneType] = None, dtype: Optional[Dict[str, Any]] = None, coerce_float: bool = True, chunksize: Optional[int] = -1, workers: Optional[int] = None, chunk_hook: Optional[Callable[[pandas.core.frame.DataFrame], Any]] = None, as_hook_results: bool = False, chunks: Optional[int] = None, schema: Optional[str] = None, as_chunks: bool = False, as_iterator: bool = False, as_dask: bool = False, index_col: Optional[str] = None, silent: bool = False, debug: bool = False, **kw: Any) -> 'Union[pandas.DataFrame, dask.DataFrame, List[pandas.DataFrame], List[Any], None]':
 26def read(
 27    self,
 28    query_or_table: Union[str, sqlalchemy.Query],
 29    params: Union[Dict[str, Any], List[str], None] = None,
 30    dtype: Optional[Dict[str, Any]] = None,
 31    coerce_float: bool = True,
 32    chunksize: Optional[int] = -1,
 33    workers: Optional[int] = None,
 34    chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None,
 35    as_hook_results: bool = False,
 36    chunks: Optional[int] = None,
 37    schema: Optional[str] = None,
 38    as_chunks: bool = False,
 39    as_iterator: bool = False,
 40    as_dask: bool = False,
 41    index_col: Optional[str] = None,
 42    silent: bool = False,
 43    debug: bool = False,
 44    **kw: Any
 45) -> Union[
 46    pandas.DataFrame,
 47    dask.DataFrame,
 48    List[pandas.DataFrame],
 49    List[Any],
 50    None,
 51]:
 52    """
 53    Read a SQL query or table into a pandas dataframe.
 54
 55    Parameters
 56    ----------
 57    query_or_table: Union[str, sqlalchemy.Query]
 58        The SQL query (sqlalchemy Query or string) or name of the table from which to select.
 59
 60    params: Optional[Dict[str, Any]], default None
 61        `List` or `Dict` of parameters to pass to `pandas.read_sql()`.
 62        See the pandas documentation for more information:
 63        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html
 64
 65    dtype: Optional[Dict[str, Any]], default None
 66        A dictionary of data types to pass to `pandas.read_sql()`.
 67        See the pandas documentation for more information:
 68        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html
 69
 70    chunksize: Optional[int], default -1
 71        How many chunks to read at a time. `None` will read everything in one large chunk.
 72        Defaults to system configuration.
 73
 74        **NOTE:** DuckDB does not allow for chunking.
 75
 76    workers: Optional[int], default None
 77        How many threads to use when consuming the generator.
 78        Only applies if `chunk_hook` is provided.
 79
 80    chunk_hook: Optional[Callable[[pandas.DataFrame], Any]], default None
 81        Hook function to execute once per chunk, e.g. writing and reading chunks intermittently.
 82        See `--sync-chunks` for an example.
 83        **NOTE:** `as_iterator` MUST be False (default).
 84
 85    as_hook_results: bool, default False
 86        If `True`, return a `List` of the outputs of the hook function.
 87        Only applicable if `chunk_hook` is not None.
 88
 89        **NOTE:** `as_iterator` MUST be `False` (default).
 90
 91    chunks: Optional[int], default None
 92        Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and
 93        return into a single dataframe.
 94        For example, to limit the returned dataframe to 100,000 rows,
 95        you could specify a `chunksize` of `1000` and `chunks` of `100`.
 96
 97    schema: Optional[str], default None
 98        If just a table name is provided, optionally specify the table schema.
 99        Defaults to `SQLConnector.schema`.
100
101    as_chunks: bool, default False
102        If `True`, return a list of DataFrames.
103        Otherwise return a single DataFrame.
104
105    as_iterator: bool, default False
106        If `True`, return the pandas DataFrame iterator.
107        `chunksize` must not be `None` (falls back to 1000 if so),
108        and hooks are not called in this case.
109
110    index_col: Optional[str], default None
111        If using Dask, use this column as the index column.
112        If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
113
114    silent: bool, default False
115        If `True`, don't raise warnings in case of errors.
116        Defaults to `False`.
117
118    Returns
119    -------
120    A `pd.DataFrame` (default case), or an iterator, or a list of dataframes / iterators,
121    or `None` if something breaks.
122
123    """
124    if chunks is not None and chunks <= 0:
125        return []
126    from meerschaum.utils.sql import sql_item_name, truncate_item_name
127    from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
128    from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
129    from meerschaum.utils.packages import attempt_import, import_pandas
130    from meerschaum.utils.pool import get_pool
131    from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
132    import warnings
133    import traceback
134    from decimal import Decimal
135    pd = import_pandas()
136    dd = None
137    is_dask = 'dask' in pd.__name__
138    pandas = attempt_import('pandas')
139    is_dask = dd is not None
140    npartitions = chunksize_to_npartitions(chunksize)
141    if is_dask:
142        chunksize = None
143    schema = schema or self.schema
144    utc_dt_cols = [
145        col
146        for col, typ in dtype.items()
147        if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
148    ] if dtype else []
149
150    if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
151        dtype = dtype.copy()
152        for col in utc_dt_cols:
153            dtype[col] = 'datetime64[ns]'
154
155    pool = get_pool(workers=workers)
156    sqlalchemy = attempt_import("sqlalchemy")
157    default_chunksize = self._sys_config.get('chunksize', None)
158    chunksize = chunksize if chunksize != -1 else default_chunksize
159    if chunksize is None and as_iterator:
160        if not silent and self.flavor not in _disallow_chunks_flavors:
161            warn(
162                "An iterator may only be generated if chunksize is not None.\n"
163                + "Falling back to a chunksize of 1000.", stacklevel=3,
164            )
165        chunksize = 1000
166    if chunksize is not None and self.flavor in _max_chunks_flavors:
167        if chunksize > _max_chunks_flavors[self.flavor]:
168            if chunksize != default_chunksize:
169                warn(
170                    f"The specified chunksize of {chunksize} exceeds the maximum of "
171                    + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
172                    + f"    Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
173                    stacklevel=3,
174                )
175            chunksize = _max_chunks_flavors[self.flavor]
176
177    if chunksize is not None and self.flavor in _disallow_chunks_flavors:
178        chunksize = None
179
180    if debug:
181        import time
182        start = time.perf_counter()
183        dprint(f"[{self}]\n{query_or_table}")
184        dprint(f"[{self}] Fetching with chunksize: {chunksize}")
185
186    ### This might be sqlalchemy object or the string of a table name.
187    ### We check for spaces and quotes to see if it might be a weird table.
188    if (
189        ' ' not in str(query_or_table)
190        or (
191            ' ' in str(query_or_table)
192            and str(query_or_table).startswith('"')
193            and str(query_or_table).endswith('"')
194        )
195    ):
196        truncated_table_name = truncate_item_name(str(query_or_table), self.flavor)
197        if truncated_table_name != str(query_or_table) and not silent:
198            warn(
199                f"Table '{query_or_table}' is too long for '{self.flavor}',"
200                + f" will instead read the table '{truncated_table_name}'."
201            )
202
203        query_or_table = sql_item_name(str(query_or_table), self.flavor, schema)
204        if debug:
205            dprint(f"[{self}] Reading from table {query_or_table}")
206        formatted_query = sqlalchemy.text("SELECT * FROM " + str(query_or_table))
207        str_query = f"SELECT * FROM {query_or_table}"
208    else:
209        str_query = query_or_table
210
211    formatted_query = (
212        sqlalchemy.text(str_query)
213        if not is_dask and isinstance(str_query, str)
214        else format_sql_query_for_dask(str_query)
215    )
216
217    chunk_list = []
218    chunk_hook_results = []
219    def _process_chunk(_chunk, _retry_on_failure: bool = True):
220        if self.flavor in TIMEZONE_NAIVE_FLAVORS:
221            for col in utc_dt_cols:
222                _chunk[col] = coerce_timezone(_chunk[col], strip_timezone=False)
223        if not as_hook_results:
224            chunk_list.append(_chunk)
225        if chunk_hook is None:
226            return None
227
228        result = None
229        try:
230            result = chunk_hook(
231                _chunk,
232                workers=workers,
233                chunksize=chunksize,
234                debug=debug,
235                **kw
236            )
237        except Exception:
238            result = False, traceback.format_exc()
239            from meerschaum.utils.formatting import get_console
240            if not silent:
241                get_console().print_exception()
242
243        ### If the chunk fails to process, try it again one more time.
244        if isinstance(result, tuple) and result[0] is False:
245            if _retry_on_failure:
246                return _process_chunk(_chunk, _retry_on_failure=False)
247
248        return result
249
250    try:
251        stream_results = not as_iterator and chunk_hook is not None and chunksize is not None
252        with warnings.catch_warnings():
253            warnings.filterwarnings('ignore', 'case sensitivity issues')
254
255            read_sql_query_kwargs = {
256                'params': params,
257                'dtype': dtype,
258                'coerce_float': coerce_float,
259                'index_col': index_col,
260            }
261            if is_dask:
262                if index_col is None:
263                    dd = None
264                    pd = attempt_import('pandas')
265                    read_sql_query_kwargs.update({
266                        'chunksize': chunksize,
267                    })
268            else:
269                read_sql_query_kwargs.update({
270                    'chunksize': chunksize,
271                })
272
273            if is_dask and dd is not None:
274                ddf = dd.read_sql_query(
275                    formatted_query,
276                    self.URI,
277                    **read_sql_query_kwargs
278                )
279            else:
280
281                def get_chunk_generator(connectable):
282                    chunk_generator = pd.read_sql_query(
283                        formatted_query,
284                        self.engine,
285                        **read_sql_query_kwargs
286                    )
287                    to_return = (
288                        chunk_generator
289                        if as_iterator or chunksize is None
290                        else (
291                            list(pool.imap(_process_chunk, chunk_generator))
292                            if as_hook_results
293                            else None
294                        )
295                    )
296                    return chunk_generator, to_return
297
298                if self.flavor in SKIP_READ_TRANSACTION_FLAVORS:
299                    chunk_generator, to_return = get_chunk_generator(self.engine)
300                else:
301                    with self.engine.begin() as transaction:
302                        with transaction.execution_options(stream_results=stream_results) as connection:
303                            chunk_generator, to_return = get_chunk_generator(connection)
304
305                if to_return is not None:
306                    return to_return
307
308    except Exception as e:
309        if debug:
310            dprint(f"[{self}] Failed to execute query:\n\n{query_or_table}\n\n")
311        if not silent:
312            warn(str(e), stacklevel=3)
313        from meerschaum.utils.formatting import get_console
314        if not silent:
315            get_console().print_exception()
316
317        return None
318
319    if is_dask and dd is not None:
320        ddf = ddf.reset_index()
321        return ddf
322
323    chunk_list = []
324    read_chunks = 0
325    chunk_hook_results = []
326    if chunksize is None:
327        chunk_list.append(chunk_generator)
328    elif as_iterator:
329        return chunk_generator
330    else:
331        try:
332            for chunk in chunk_generator:
333                if chunk_hook is not None:
334                    chunk_hook_results.append(
335                        chunk_hook(chunk, chunksize=chunksize, debug=debug, **kw)
336                    )
337                chunk_list.append(chunk)
338                read_chunks += 1
339                if chunks is not None and read_chunks >= chunks:
340                    break
341        except Exception as e:
342            warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3)
343            from meerschaum.utils.formatting import get_console
344            if not silent:
345                get_console().print_exception()
346
347    read_chunks = 0
348    try:
349        for chunk in chunk_generator:
350            if chunk_hook is not None:
351                chunk_hook_results.append(
352                    chunk_hook(chunk, chunksize=chunksize, debug=debug, **kw)
353                )
354            chunk_list.append(chunk)
355            read_chunks += 1
356            if chunks is not None and read_chunks >= chunks:
357                break
358    except Exception as e:
359        warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3)
360        from meerschaum.utils.formatting import get_console
361        if not silent:
362            get_console().print_exception()
363
364        return None
365
366    ### If no chunks returned, read without chunks
367    ### to get columns
368    if len(chunk_list) == 0:
369        with warnings.catch_warnings():
370            warnings.filterwarnings('ignore', 'case sensitivity issues')
371            _ = read_sql_query_kwargs.pop('chunksize', None)
372            with self.engine.begin() as connection:
373                chunk_list.append(
374                    pd.read_sql_query(
375                        formatted_query,
376                        connection,
377                        **read_sql_query_kwargs
378                    )
379                )
380
381    ### call the hook on any missed chunks.
382    if chunk_hook is not None and len(chunk_list) > len(chunk_hook_results):
383        for c in chunk_list[len(chunk_hook_results):]:
384            chunk_hook_results.append(
385                chunk_hook(c, chunksize=chunksize, debug=debug, **kw)
386            )
387
388    ### chunksize is not None so must iterate
389    if debug:
390        end = time.perf_counter()
391        dprint(f"Fetched {len(chunk_list)} chunks in {round(end - start, 2)} seconds.")
392
393    if as_hook_results:
394        return chunk_hook_results
395    
396    ### Skip `pd.concat()` if `as_chunks` is specified.
397    if as_chunks:
398        for c in chunk_list:
399            c.reset_index(drop=True, inplace=True)
400            for col in get_numeric_cols(c):
401                c[col] = c[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x)
402        return chunk_list
403
404    df = pd.concat(chunk_list).reset_index(drop=True)
405    ### NOTE: The calls to `canonical()` are to drop leading and trailing zeroes.
406    for col in get_numeric_cols(df):
407        df[col] = df[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x)
408
409    return df

Read a SQL query or table into a pandas dataframe.

Parameters
  • query_or_table (Union[str, sqlalchemy.Query]): The SQL query (sqlalchemy Query or string) or name of the table from which to select.
  • params (Optional[Dict[str, Any]], default None): List or Dict of parameters to pass to pandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html
  • dtype (Optional[Dict[str, Any]], default None): A dictionary of data types to pass to pandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html
  • chunksize (Optional[int], default -1): How many chunks to read at a time. None will read everything in one large chunk. Defaults to system configuration.

    NOTE: DuckDB does not allow for chunking.

  • workers (Optional[int], default None): How many threads to use when consuming the generator. Only applies if chunk_hook is provided.
  • chunk_hook (Optional[Callable[[pandas.DataFrame], Any]], default None): Hook function to execute once per chunk, e.g. writing and reading chunks intermittently. See --sync-chunks for an example. NOTE: as_iterator MUST be False (default).
  • as_hook_results (bool, default False): If True, return a List of the outputs of the hook function. Only applicable if chunk_hook is not None.

    NOTE: as_iterator MUST be False (default).

  • chunks (Optional[int], default None): Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and return into a single dataframe. For example, to limit the returned dataframe to 100,000 rows, you could specify a chunksize of 1000 and chunks of 100.
  • schema (Optional[str], default None): If just a table name is provided, optionally specify the table schema. Defaults to SQLConnector.schema.
  • as_chunks (bool, default False): If True, return a list of DataFrames. Otherwise return a single DataFrame.
  • as_iterator (bool, default False): If True, return the pandas DataFrame iterator. chunksize must not be None (falls back to 1000 if so), and hooks are not called in this case.
  • index_col (Optional[str], default None): If using Dask, use this column as the index column. If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
  • silent (bool, default False): If True, don't raise warnings in case of errors. Defaults to False.
Returns
  • A pd.DataFrame (default case), or an iterator, or a list of dataframes / iterators,
  • or None if something breaks.
def value(self, query: str, *args: Any, use_pandas: bool = False, **kw: Any) -> Any:
412def value(
413    self,
414    query: str,
415    *args: Any,
416    use_pandas: bool = False,
417    **kw: Any
418) -> Any:
419    """
420    Execute the provided query and return the first value.
421
422    Parameters
423    ----------
424    query: str
425        The SQL query to execute.
426        
427    *args: Any
428        The arguments passed to `meerschaum.connectors.sql.SQLConnector.exec`
429        if `use_pandas` is `False` (default) or to `meerschaum.connectors.sql.SQLConnector.read`.
430        
431    use_pandas: bool, default False
432        If `True`, use `meerschaum.connectors.SQLConnector.read`, otherwise use
433        `meerschaum.connectors.sql.SQLConnector.exec` (default).
434        **NOTE:** This is always `True` for DuckDB.
435
436    **kw: Any
437        See `args`.
438
439    Returns
440    -------
441    Any value returned from the query.
442
443    """
444    from meerschaum.utils.packages import attempt_import
445    sqlalchemy = attempt_import('sqlalchemy')
446    if self.flavor == 'duckdb':
447        use_pandas = True
448    if use_pandas:
449        try:
450            return self.read(query, *args, **kw).iloc[0, 0]
451        except Exception:
452            return None
453
454    _close = kw.get('close', True)
455    _commit = kw.get('commit', (self.flavor != 'mssql'))
456
457    #  _close = True
458    #  _commit = True
459
460    try:
461        result, connection = self.exec(
462            query,
463            *args,
464            with_connection=True,
465            close=False,
466            commit=_commit,
467            **kw
468        )
469        first = result.first() if result is not None else None
470        _val = first[0] if first is not None else None
471    except Exception as e:
472        warn(e, stacklevel=3)
473        return None
474    if _close:
475        try:
476            connection.close()
477        except Exception as e:
478            warn("Failed to close connection with exception:\n" + str(e))
479    return _val

Execute the provided query and return the first value.

Parameters
Returns
  • Any value returned from the query.
def exec( self, query: str, *args: Any, silent: bool = False, debug: bool = False, commit: Optional[bool] = None, close: Optional[bool] = None, with_connection: bool = False, _connection=None, _transaction=None, **kw: Any) -> 'Union[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.cursor.LegacyCursorResult, Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection], Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection], None]':
493def exec(
494    self,
495    query: str,
496    *args: Any,
497    silent: bool = False,
498    debug: bool = False,
499    commit: Optional[bool] = None,
500    close: Optional[bool] = None,
501    with_connection: bool = False,
502    _connection=None,
503    _transaction=None,
504    **kw: Any
505) -> Union[
506        sqlalchemy.engine.result.resultProxy,
507        sqlalchemy.engine.cursor.LegacyCursorResult,
508        Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection],
509        Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection],
510        None
511]:
512    """
513    Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures.
514
515    If inserting data, please use bind variables to avoid SQL injection!
516
517    Parameters
518    ----------
519    query: Union[str, List[str], Tuple[str]]
520        The query to execute.
521        If `query` is a list or tuple, call `self.exec_queries()` instead.
522
523    args: Any
524        Arguments passed to `sqlalchemy.engine.execute`.
525
526    silent: bool, default False
527        If `True`, suppress warnings.
528
529    commit: Optional[bool], default None
530        If `True`, commit the changes after execution.
531        Causes issues with flavors like `'mssql'`.
532        This does not apply if `query` is a list of strings.
533
534    close: Optional[bool], default None
535        If `True`, close the connection after execution.
536        Causes issues with flavors like `'mssql'`.
537        This does not apply if `query` is a list of strings.
538
539    with_connection: bool, default False
540        If `True`, return a tuple including the connection object.
541        This does not apply if `query` is a list of strings.
542
543    Returns
544    -------
545    The `sqlalchemy` result object, or a tuple with the connection if `with_connection` is provided.
546
547    """
548    if isinstance(query, (list, tuple)):
549        return self.exec_queries(
550            list(query),
551            *args,
552            silent=silent,
553            debug=debug,
554            **kw
555        )
556
557    from meerschaum.utils.packages import attempt_import
558    sqlalchemy = attempt_import("sqlalchemy")
559    if debug:
560        dprint(f"[{self}] Executing query:\n{query}")
561
562    _close = close if close is not None else (self.flavor != 'mssql')
563    _commit = commit if commit is not None else (
564        (self.flavor != 'mssql' or 'select' not in str(query).lower())
565    )
566
567    ### Select and Insert objects need to be compiled (SQLAlchemy 2.0.0+).
568    if not hasattr(query, 'compile'):
569        query = sqlalchemy.text(query)
570
571    connection = _connection if _connection is not None else self.get_connection()
572
573    try:
574        transaction = (
575            _transaction
576            if _transaction is not None else (
577                connection.begin()
578                if _commit
579                else None
580            )
581        )
582    except sqlalchemy.exc.InvalidRequestError as e:
583        if _connection is not None or _transaction is not None:
584            raise e
585        connection = self.get_connection(rebuild=True)
586        transaction = connection.begin()
587
588    if transaction is not None and not transaction.is_active and _transaction is not None:
589        connection = self.get_connection(rebuild=True)
590        transaction = connection.begin() if _commit else None
591
592    result = None
593    try:
594        result = connection.execute(query, *args, **kw)
595        if _commit:
596            transaction.commit()
597    except Exception as e:
598        if debug:
599            dprint(f"[{self}] Failed to execute query:\n\n{query}\n\n{e}")
600        if not silent:
601            warn(str(e), stacklevel=3)
602        result = None
603        if _commit:
604            transaction.rollback()
605            connection = self.get_connection(rebuild=True)
606    finally:
607        if _close:
608            connection.close()
609
610    if with_connection:
611        return result, connection
612
613    return result

Execute SQL code and return the sqlalchemy result, e.g. when calling stored procedures.

If inserting data, please use bind variables to avoid SQL injection!

Parameters
  • query (Union[str, List[str], Tuple[str]]): The query to execute. If query is a list or tuple, call self.exec_queries() instead.
  • args (Any): Arguments passed to sqlalchemy.engine.execute.
  • silent (bool, default False): If True, suppress warnings.
  • commit (Optional[bool], default None): If True, commit the changes after execution. Causes issues with flavors like 'mssql'. This does not apply if query is a list of strings.
  • close (Optional[bool], default None): If True, close the connection after execution. Causes issues with flavors like 'mssql'. This does not apply if query is a list of strings.
  • with_connection (bool, default False): If True, return a tuple including the connection object. This does not apply if query is a list of strings.
Returns
  • The sqlalchemy result object, or a tuple with the connection if with_connection is provided.
def execute( self, *args: Any, **kw: Any) -> 'Optional[sqlalchemy.engine.result.resultProxy]':
482def execute(
483    self,
484    *args : Any,
485    **kw : Any
486) -> Optional[sqlalchemy.engine.result.resultProxy]:
487    """
488    An alias for `meerschaum.connectors.sql.SQLConnector.exec`.
489    """
490    return self.exec(*args, **kw)
def to_sql( self, df: pandas.core.frame.DataFrame, name: str = None, index: bool = False, if_exists: str = 'replace', method: str = '', chunksize: Optional[int] = -1, schema: Optional[str] = None, silent: bool = False, debug: bool = False, as_tuple: bool = False, as_dict: bool = False, _connection=None, _transaction=None, **kw) -> Union[bool, Tuple[bool, str]]:
710def to_sql(
711    self,
712    df: pandas.DataFrame,
713    name: str = None,
714    index: bool = False,
715    if_exists: str = 'replace',
716    method: str = "",
717    chunksize: Optional[int] = -1,
718    schema: Optional[str] = None,
719    silent: bool = False,
720    debug: bool = False,
721    as_tuple: bool = False,
722    as_dict: bool = False,
723    _connection=None,
724    _transaction=None,
725    **kw
726) -> Union[bool, SuccessTuple]:
727    """
728    Upload a DataFrame's contents to the SQL server.
729
730    Parameters
731    ----------
732    df: pd.DataFrame
733        The DataFrame to be uploaded.
734
735    name: str
736        The name of the table to be created.
737
738    index: bool, default False
739        If True, creates the DataFrame's indices as columns.
740
741    if_exists: str, default 'replace'
742        Drop and create the table ('replace') or append if it exists
743        ('append') or raise Exception ('fail').
744        Options are ['replace', 'append', 'fail'].
745
746    method: str, default ''
747        None or multi. Details on pandas.to_sql.
748
749    chunksize: Optional[int], default -1
750        How many rows to insert at a time.
751
752    schema: Optional[str], default None
753        Optionally override the schema for the table.
754        Defaults to `SQLConnector.schema`.
755
756    as_tuple: bool, default False
757        If `True`, return a (success_bool, message) tuple instead of a `bool`.
758        Defaults to `False`.
759
760    as_dict: bool, default False
761        If `True`, return a dictionary of transaction information.
762        The keys are `success`, `msg`, `start`, `end`, `duration`, `num_rows`, `chunksize`,
763        `method`, and `target`.
764
765    kw: Any
766        Additional arguments will be passed to the DataFrame's `to_sql` function
767
768    Returns
769    -------
770    Either a `bool` or a `SuccessTuple` (depends on `as_tuple`).
771    """
772    import time
773    import json
774    import decimal
775    from decimal import Decimal, Context
776    from meerschaum.utils.warnings import error, warn
777    import warnings
778    import functools
779    if name is None:
780        error(f"Name must not be `None` to insert data into {self}.")
781
782    ### We're requiring `name` to be positional, and sometimes it's passed in from background jobs.
783    kw.pop('name', None)
784
785    schema = schema or self.schema
786
787    from meerschaum.utils.sql import (
788        sql_item_name,
789        table_exists,
790        json_flavors,
791        truncate_item_name,
792        DROP_IF_EXISTS_FLAVORS,
793    )
794    from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
795    from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
796    from meerschaum.utils.dtypes.sql import (
797        NUMERIC_PRECISION_FLAVORS,
798        PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
799        get_db_type_from_pd_type,
800    )
801    from meerschaum.connectors.sql._create_engine import flavor_configs
802    from meerschaum.utils.packages import attempt_import, import_pandas
803    sqlalchemy = attempt_import('sqlalchemy', debug=debug)
804    pd = import_pandas()
805    is_dask = 'dask' in df.__module__
806
807    stats = {'target': name, }
808    ### resort to defaults if None
809    if method == "":
810        if self.flavor in _bulk_flavors:
811            method = functools.partial(psql_insert_copy, schema=self.schema)
812        else:
813            ### Should resolve to 'multi' or `None`.
814            method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
815    stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
816
817    default_chunksize = self._sys_config.get('chunksize', None)
818    chunksize = chunksize if chunksize != -1 else default_chunksize
819    if chunksize is not None and self.flavor in _max_chunks_flavors:
820        if chunksize > _max_chunks_flavors[self.flavor]:
821            if chunksize != default_chunksize:
822                warn(
823                    f"The specified chunksize of {chunksize} exceeds the maximum of "
824                    + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
825                    + f"    Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
826                    stacklevel = 3,
827                )
828            chunksize = _max_chunks_flavors[self.flavor]
829    stats['chunksize'] = chunksize
830
831    success, msg = False, "Default to_sql message"
832    start = time.perf_counter()
833    if debug:
834        msg = f"[{self}] Inserting {len(df)} rows with chunksize: {chunksize}..."
835        print(msg, end="", flush=True)
836    stats['num_rows'] = len(df)
837
838    ### Check if the name is too long.
839    truncated_name = truncate_item_name(name, self.flavor)
840    if name != truncated_name:
841        warn(
842            f"Table '{name}' is too long for '{self.flavor}',"
843            + f" will instead create the table '{truncated_name}'."
844        )
845
846    ### filter out non-pandas args
847    import inspect
848    to_sql_params = inspect.signature(df.to_sql).parameters
849    to_sql_kw = {}
850    for k, v in kw.items():
851        if k in to_sql_params:
852            to_sql_kw[k] = v
853
854    to_sql_kw.update({
855        'name': truncated_name,
856        'schema': schema,
857        ('con' if not is_dask else 'uri'): (self.engine if not is_dask else self.URI),
858        'index': index,
859        'if_exists': if_exists,
860        'method': method,
861        'chunksize': chunksize,
862    })
863    if is_dask:
864        to_sql_kw.update({
865            'parallel': True,
866        })
867    elif _connection is not None:
868        to_sql_kw['con'] = _connection
869
870    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
871    if self.flavor == 'oracle':
872        ### For some reason 'replace' doesn't work properly in pandas,
873        ### so try dropping first.
874        if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug):
875            success = self.exec(
876                f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema)
877            ) is not None
878            if not success:
879                warn(f"Unable to drop {name}")
880
881        ### Enforce NVARCHAR(2000) as text instead of CLOB.
882        dtype = to_sql_kw.get('dtype', {})
883        for col, typ in df.dtypes.items():
884            if are_dtypes_equal(str(typ), 'object'):
885                dtype[col] = sqlalchemy.types.NVARCHAR(2000)
886            elif are_dtypes_equal(str(typ), 'int'):
887                dtype[col] = sqlalchemy.types.INTEGER
888        to_sql_kw['dtype'] = dtype
889    elif self.flavor == 'duckdb':
890        dtype = to_sql_kw.get('dtype', {})
891        dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
892        for col in dt_cols:
893            df[col] = coerce_timezone(df[col], strip_utc=False)
894    elif self.flavor == 'mssql':
895        dtype = to_sql_kw.get('dtype', {})
896        dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
897        new_dtype = {}
898        for col in dt_cols:
899            if col in dtype:
900                continue
901            dt_typ = get_db_type_from_pd_type(str(df.dtypes[col]), self.flavor, as_sqlalchemy=True)
902            if col not in dtype:
903                new_dtype[col] = dt_typ
904
905        dtype.update(new_dtype)
906        to_sql_kw['dtype'] = dtype
907
908    ### Check for JSON columns.
909    if self.flavor not in json_flavors:
910        json_cols = get_json_cols(df)
911        if json_cols:
912            for col in json_cols:
913                df[col] = df[col].apply(
914                    (
915                        lambda x: json.dumps(x, default=str, sort_keys=True)
916                        if not isinstance(x, Hashable)
917                        else x
918                    )
919                )
920
921    ### Check for numeric columns.
922    numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
923    if numeric_precision is not None and numeric_scale is not None:
924        numeric_cols = get_numeric_cols(df)
925        for col in numeric_cols:
926            df[col] = df[col].apply(
927                lambda x: (
928                    quantize_decimal(x, numeric_scale, numeric_precision)
929                    if isinstance(x, Decimal)
930                    else x
931                )
932            )
933
934    if PD_TO_SQLALCHEMY_DTYPES_FLAVORS['uuid'].get(self.flavor, None) != 'Uuid':
935        uuid_cols = get_uuid_cols(df)
936        for col in uuid_cols:
937            df[col] = df[col].astype(str)
938
939    try:
940        with warnings.catch_warnings():
941            warnings.filterwarnings('ignore')
942            df.to_sql(**to_sql_kw)
943        success = True
944    except Exception as e:
945        if not silent:
946            warn(str(e))
947        success, msg = False, str(e)
948
949    end = time.perf_counter()
950    if success:
951        msg = f"It took {round(end - start, 2)} seconds to sync {len(df)} rows to {name}."
952    stats['start'] = start
953    stats['end'] = end
954    stats['duration'] = end - start
955
956    if debug:
957        print(f" done.", flush=True)
958        dprint(msg)
959
960    stats['success'] = success
961    stats['msg'] = msg
962    if as_tuple:
963        return success, msg
964    if as_dict:
965        return stats
966    return success

Upload a DataFrame's contents to the SQL server.

Parameters
  • df (pd.DataFrame): The DataFrame to be uploaded.
  • name (str): The name of the table to be created.
  • index (bool, default False): If True, creates the DataFrame's indices as columns.
  • if_exists (str, default 'replace'): Drop and create the table ('replace') or append if it exists ('append') or raise Exception ('fail'). Options are ['replace', 'append', 'fail'].
  • method (str, default ''): None or multi. Details on pandas.to_sql.
  • chunksize (Optional[int], default -1): How many rows to insert at a time.
  • schema (Optional[str], default None): Optionally override the schema for the table. Defaults to SQLConnector.schema.
  • as_tuple (bool, default False): If True, return a (success_bool, message) tuple instead of a bool. Defaults to False.
  • as_dict (bool, default False): If True, return a dictionary of transaction information. The keys are success, msg, start, end, duration, num_rows, chunksize, method, and target.
  • kw (Any): Additional arguments will be passed to the DataFrame's to_sql function
Returns
  • Either a bool or a SuccessTuple (depends on as_tuple).
def exec_queries( self, queries: "List[Union[str, Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]]]", break_on_error: bool = False, rollback: bool = True, silent: bool = False, debug: bool = False) -> 'List[sqlalchemy.engine.cursor.LegacyCursorResult]':
616def exec_queries(
617    self,
618    queries: List[
619        Union[
620            str,
621            Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]
622        ]
623    ],
624    break_on_error: bool = False,
625    rollback: bool = True,
626    silent: bool = False,
627    debug: bool = False,
628) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
629    """
630    Execute a list of queries in a single transaction.
631
632    Parameters
633    ----------
634    queries: List[
635        Union[
636            str,
637            Tuple[str, Callable[[], List[str]]]
638        ]
639    ]
640        The queries in the transaction to be executed.
641        If a query is a tuple, the second item of the tuple
642        will be considered a callable hook that returns a list of queries to be executed
643        before the next item in the list.
644
645    break_on_error: bool, default False
646        If `True`, stop executing when a query fails.
647
648    rollback: bool, default True
649        If `break_on_error` is `True`, rollback the transaction if a query fails.
650
651    silent: bool, default False
652        If `True`, suppress warnings.
653
654    Returns
655    -------
656    A list of SQLAlchemy results.
657    """
658    from meerschaum.utils.warnings import warn
659    from meerschaum.utils.debug import dprint
660    from meerschaum.utils.packages import attempt_import
661    sqlalchemy, sqlalchemy_orm = attempt_import('sqlalchemy', 'sqlalchemy.orm')
662    session = sqlalchemy_orm.Session(self.engine)
663
664    result = None
665    results = []
666    with session.begin():
667        for query in queries:
668            hook = None
669            result = None
670
671            if isinstance(query, tuple):
672                query, hook = query
673            if isinstance(query, str):
674                query = sqlalchemy.text(query)
675
676            if debug:
677                dprint(f"[{self}]\n" + str(query))
678
679            try:
680                result = session.execute(query)
681                session.flush()
682            except Exception as e:
683                msg = (f"Encountered error while executing:\n{e}")
684                if not silent:
685                    warn(msg)
686                elif debug:
687                    dprint(f"[{self}]\n" + str(msg))
688                result = None
689            if result is None and break_on_error:
690                if rollback:
691                    session.rollback()
692                break
693            elif result is not None and hook is not None:
694                hook_queries = hook(session)
695                if hook_queries:
696                    hook_results = self.exec_queries(
697                        hook_queries,
698                        break_on_error = break_on_error,
699                        rollback=rollback,
700                        silent=silent,
701                        debug=debug,
702                    )
703                    result = (result, hook_results)
704
705            results.append(result)
706
707    return results

Execute a list of queries in a single transaction.

Parameters
  • queries (List[): Union[ str, Tuple[str, Callable[[], List[str]]] ]
  • ]: The queries in the transaction to be executed. If a query is a tuple, the second item of the tuple will be considered a callable hook that returns a list of queries to be executed before the next item in the list.
  • break_on_error (bool, default False): If True, stop executing when a query fails.
  • rollback (bool, default True): If break_on_error is True, rollback the transaction if a query fails.
  • silent (bool, default False): If True, suppress warnings.
Returns
  • A list of SQLAlchemy results.
def get_connection(self, rebuild: bool = False) -> "'sqlalchemy.engine.base.Connection'":
1065def get_connection(self, rebuild: bool = False) -> 'sqlalchemy.engine.base.Connection':
1066    """
1067    Return the current alive connection.
1068
1069    Parameters
1070    ----------
1071    rebuild: bool, default False
1072        If `True`, close the previous connection and open a new one.
1073
1074    Returns
1075    -------
1076    A `sqlalchemy.engine.base.Connection` object.
1077    """
1078    import threading
1079    if '_thread_connections' not in self.__dict__:
1080        self.__dict__['_thread_connections'] = {}
1081
1082    self._cleanup_connections()
1083
1084    thread_id = threading.get_ident()
1085
1086    thread_connections = self.__dict__.get('_thread_connections', {})
1087    connection = thread_connections.get(thread_id, None)
1088
1089    if rebuild and connection is not None:
1090        try:
1091            connection.close()
1092        except Exception:
1093            pass
1094
1095        _ = thread_connections.pop(thread_id, None)
1096        connection = None
1097
1098    if connection is None or connection.closed:
1099        connection = self.engine.connect()
1100        thread_connections[thread_id] = connection
1101
1102    return connection

Return the current alive connection.

Parameters
  • rebuild (bool, default False): If True, close the previous connection and open a new one.
Returns
  • A sqlalchemy.engine.base.Connection object.
def test_connection(self, **kw: Any) -> Optional[bool]:
642def test_connection(
643    self,
644    **kw: Any
645) -> Union[bool, None]:
646    """
647    Test if a successful connection to the database may be made.
648
649    Parameters
650    ----------
651    **kw:
652        The keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
653
654    Returns
655    -------
656    `True` if a connection is made, otherwise `False` or `None` in case of failure.
657
658    """
659    import warnings
660    from meerschaum.connectors.poll import retry_connect
661    _default_kw = {'max_retries': 1, 'retry_wait': 0, 'warn': False, 'connector': self}
662    _default_kw.update(kw)
663    with warnings.catch_warnings():
664        warnings.filterwarnings('ignore', 'Could not')
665        try:
666            return retry_connect(**_default_kw)
667        except Exception as e:
668            return False

Test if a successful connection to the database may be made.

Parameters
Returns
  • True if a connection is made, otherwise False or None in case of failure.
def fetch( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, str, NoneType] = '', end: Union[datetime.datetime, int, str, NoneType] = None, check_existing: bool = True, chunk_hook: "Optional[Callable[['pd.DataFrame'], Any]]" = None, chunksize: Optional[int] = -1, workers: Optional[int] = None, debug: bool = False, **kw: Any) -> "Union['pd.DataFrame', List[Any], None]":
 17def fetch(
 18    self,
 19    pipe: mrsm.Pipe,
 20    begin: Union[datetime, int, str, None] = '',
 21    end: Union[datetime, int, str, None] = None,
 22    check_existing: bool = True,
 23    chunk_hook: Optional[Callable[['pd.DataFrame'], Any]] = None,
 24    chunksize: Optional[int] = -1,
 25    workers: Optional[int] = None,
 26    debug: bool = False,
 27    **kw: Any
 28) -> Union['pd.DataFrame', List[Any], None]:
 29    """Execute the SQL definition and return a Pandas DataFrame.
 30
 31    Parameters
 32    ----------
 33    pipe: mrsm.Pipe
 34        The pipe object which contains the `fetch` metadata.
 35
 36        - pipe.columns['datetime']: str
 37            - Name of the datetime column for the remote table.
 38        - pipe.parameters['fetch']: Dict[str, Any]
 39            - Parameters necessary to execute a query.
 40        - pipe.parameters['fetch']['definition']: str
 41            - Raw SQL query to execute to generate the pandas DataFrame.
 42        - pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
 43            - How many minutes before `begin` to search for data (*optional*).
 44
 45    begin: Union[datetime, int, str, None], default None
 46        Most recent datatime to search for data.
 47        If `backtrack_minutes` is provided, subtract `backtrack_minutes`.
 48
 49    end: Union[datetime, int, str, None], default None
 50        The latest datetime to search for data.
 51        If `end` is `None`, do not bound 
 52
 53    check_existing: bool, defult True
 54        If `False`, use a backtrack interval of 0 minutes.
 55
 56    chunk_hook: Callable[[pd.DataFrame], Any], default None
 57        A function to pass to `SQLConnector.read()` that accepts a Pandas DataFrame.
 58
 59    chunksize: Optional[int], default -1
 60        How many rows to load into memory at once (when `chunk_hook` is provided).
 61        Otherwise the entire result set is loaded into memory.
 62
 63    workers: Optional[int], default None
 64        How many threads to use when consuming the generator (when `chunk_hook is provided).
 65        Defaults to the number of cores.
 66
 67    debug: bool, default False
 68        Verbosity toggle.
 69
 70    Returns
 71    -------
 72    A pandas DataFrame or `None`.
 73    If `chunk_hook` is not None, return a list of the hook function's results.
 74    """
 75    meta_def = self.get_pipe_metadef(
 76        pipe,
 77        begin=begin,
 78        end=end,
 79        check_existing=check_existing,
 80        debug=debug,
 81        **kw
 82    )
 83    as_hook_results = chunk_hook is not None
 84    chunks = self.read(
 85        meta_def,
 86        chunk_hook=chunk_hook,
 87        as_hook_results=as_hook_results,
 88        chunksize=chunksize,
 89        workers=workers,
 90        debug=debug,
 91    )
 92    ### if sqlite, parse for datetimes
 93    if not as_hook_results and self.flavor == 'sqlite':
 94        from meerschaum.utils.dataframe import parse_df_datetimes
 95        from meerschaum.utils.dtypes import are_dtypes_equal
 96        ignore_cols = [
 97            col
 98            for col, dtype in pipe.dtypes.items()
 99            if not are_dtypes_equal(str(dtype), 'datetime')
100        ]
101        return (
102            parse_df_datetimes(
103                chunk,
104                ignore_cols=ignore_cols,
105                strip_timezone=(pipe.tzinfo is None),
106                debug=debug,
107            )
108            for chunk in chunks
109        )
110    return chunks

Execute the SQL definition and return a Pandas DataFrame.

Parameters
  • pipe (mrsm.Pipe): The pipe object which contains the fetch metadata.

    • pipe.columns['datetime']: str
      • Name of the datetime column for the remote table.
    • pipe.parameters['fetch']: Dict[str, Any]
      • Parameters necessary to execute a query.
    • pipe.parameters['fetch']['definition']: str
      • Raw SQL query to execute to generate the pandas DataFrame.
    • pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
      • How many minutes before begin to search for data (optional).
  • begin (Union[datetime, int, str, None], default None): Most recent datatime to search for data. If backtrack_minutes is provided, subtract backtrack_minutes.
  • end (Union[datetime, int, str, None], default None): The latest datetime to search for data. If end is None, do not bound
  • check_existing (bool, defult True): If False, use a backtrack interval of 0 minutes.
  • chunk_hook (Callable[[pd.DataFrame], Any], default None): A function to pass to SQLConnector.read() that accepts a Pandas DataFrame.
  • chunksize (Optional[int], default -1): How many rows to load into memory at once (when chunk_hook is provided). Otherwise the entire result set is loaded into memory.
  • workers (Optional[int], default None): How many threads to use when consuming the generator (when `chunk_hook is provided). Defaults to the number of cores.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A pandas DataFrame or None.
  • If chunk_hook is not None, return a list of the hook function's results.
def get_pipe_metadef( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, begin: Union[datetime.datetime, int, str, NoneType] = '', end: Union[datetime.datetime, int, str, NoneType] = None, check_existing: bool = True, debug: bool = False, **kw: Any) -> Optional[str]:
113def get_pipe_metadef(
114    self,
115    pipe: mrsm.Pipe,
116    params: Optional[Dict[str, Any]] = None,
117    begin: Union[datetime, int, str, None] = '',
118    end: Union[datetime, int, str, None] = None,
119    check_existing: bool = True,
120    debug: bool = False,
121    **kw: Any
122) -> Union[str, None]:
123    """
124    Return a pipe's meta definition fetch query.
125
126    params: Optional[Dict[str, Any]], default None
127        Optional params dictionary to build the `WHERE` clause.
128        See `meerschaum.utils.sql.build_where`.
129
130    begin: Union[datetime, int, str, None], default None
131        Most recent datatime to search for data.
132        If `backtrack_minutes` is provided, subtract `backtrack_minutes`.
133
134    end: Union[datetime, int, str, None], default None
135        The latest datetime to search for data.
136        If `end` is `None`, do not bound 
137
138    check_existing: bool, default True
139        If `True`, apply the backtrack interval.
140
141    debug: bool, default False
142        Verbosity toggle.
143
144    Returns
145    -------
146    A pipe's meta definition fetch query string.
147    """
148    from meerschaum.utils.debug import dprint
149    from meerschaum.utils.warnings import warn, error
150    from meerschaum.utils.sql import sql_item_name, dateadd_str, build_where
151    from meerschaum.utils.misc import is_int
152    from meerschaum.config import get_config
153
154    definition = get_pipe_query(pipe)
155
156    if not pipe.columns.get('datetime', None):
157        _dt = pipe.guess_datetime()
158        dt_name = sql_item_name(_dt, self.flavor, None) if _dt else None
159        is_guess = True
160    else:
161        _dt = pipe.get_columns('datetime')
162        dt_name = sql_item_name(_dt, self.flavor, None)
163        is_guess = False
164
165    if begin not in (None, '') or end is not None:
166        if is_guess:
167            if _dt is None:
168                warn(
169                    f"Unable to determine a datetime column for {pipe}."
170                    + "\n    Ignoring begin and end...",
171                    stack = False,
172                )
173                begin, end = '', None
174            else:
175                warn(
176                    f"A datetime wasn't specified for {pipe}.\n"
177                    + f"    Using column \"{_dt}\" for datetime bounds...",
178                    stack = False
179                )
180
181    apply_backtrack = begin == '' and check_existing
182    backtrack_interval = pipe.get_backtrack_interval(check_existing=check_existing, debug=debug)
183    btm = (
184        int(backtrack_interval.total_seconds() / 60)
185        if isinstance(backtrack_interval, timedelta)
186        else backtrack_interval
187    )
188    begin = (
189        pipe.get_sync_time(debug=debug)
190        if begin == ''
191        else begin
192    )
193
194    if begin and end and begin >= end:
195        begin = None
196
197    if dt_name:
198        begin_da = (
199            dateadd_str(
200                flavor=self.flavor,
201                datepart='minute',
202                number=((-1 * btm) if apply_backtrack else 0),
203                begin=begin,
204            )
205            if begin
206            else None
207        )
208        end_da = (
209            dateadd_str(
210                flavor=self.flavor,
211                datepart='minute',
212                number=0,
213                begin=end,
214            )
215            if end
216            else None
217        )
218
219    meta_def = (
220        _simple_fetch_query(pipe, self.flavor) if (
221            (not (pipe.columns or {}).get('id', None))
222            or (not get_config('system', 'experimental', 'join_fetch'))
223        ) else _join_fetch_query(pipe, self.flavor, debug=debug, **kw)
224    )
225
226    has_where = 'where' in meta_def.lower()[meta_def.lower().rfind('definition'):]
227    if dt_name and (begin_da or end_da):
228        definition_dt_name = (
229            dateadd_str(self.flavor, 'minute', 0, f"definition.{dt_name}")
230            if not is_int((begin_da or end_da))
231            else f"definition.{dt_name}"
232        )
233        meta_def += "\n" + ("AND" if has_where else "WHERE") + " "
234        has_where = True
235        if begin_da:
236            meta_def += f"{definition_dt_name} >= {begin_da}"
237        if begin_da and end_da:
238            meta_def += " AND "
239        if end_da:
240            meta_def += f"{definition_dt_name} < {end_da}"
241
242    if params is not None:
243        params_where = build_where(params, self, with_where=False)
244        meta_def += "\n" + ("AND" if has_where else "WHERE") + " "
245        has_where = True
246        meta_def += params_where
247
248    return meta_def

Return a pipe's meta definition fetch query.

params: Optional[Dict[str, Any]], default None Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.

begin: Union[datetime, int, str, None], default None Most recent datatime to search for data. If backtrack_minutes is provided, subtract backtrack_minutes.

end: Union[datetime, int, str, None], default None The latest datetime to search for data. If end is None, do not bound

check_existing: bool, default True If True, apply the backtrack interval.

debug: bool, default False Verbosity toggle.

Returns
  • A pipe's meta definition fetch query string.
def cli(self, debug: bool = False) -> Tuple[bool, str]:
35def cli(
36        self,
37        debug: bool = False,
38    ) -> SuccessTuple:
39    """
40    Launch a subprocess for an interactive CLI.
41    """
42    from meerschaum.utils.venv import venv_exec
43    env = copy.deepcopy(dict(os.environ))
44    env[f'MRSM_SQL_{self.label.upper()}'] = json.dumps(self.meta)
45    cli_code = (
46        "import sys\n"
47        "import meerschaum as mrsm\n"
48        f"conn = mrsm.get_connector('sql:{self.label}')\n"
49        "success, msg = conn._cli_exit()\n"
50        "mrsm.pprint((success, msg))\n"
51        "if not success:\n"
52        "    raise Exception(msg)"
53    )
54    try:
55        _ = venv_exec(cli_code, venv=None, debug=debug, capture_output=False)
56    except Exception as e:
57        return False, f"[{self}] Failed to start CLI:\n{e}"
58    return True, "Success"

Launch a subprocess for an interactive CLI.

def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False) -> Optional[List[Tuple[str, str, Optional[str]]]]:
144def fetch_pipes_keys(
145    self,
146    connector_keys: Optional[List[str]] = None,
147    metric_keys: Optional[List[str]] = None,
148    location_keys: Optional[List[str]] = None,
149    tags: Optional[List[str]] = None,
150    params: Optional[Dict[str, Any]] = None,
151    debug: bool = False
152) -> Optional[List[Tuple[str, str, Optional[str]]]]:
153    """
154    Return a list of tuples corresponding to the parameters provided.
155
156    Parameters
157    ----------
158    connector_keys: Optional[List[str]], default None
159        List of connector_keys to search by.
160
161    metric_keys: Optional[List[str]], default None
162        List of metric_keys to search by.
163
164    location_keys: Optional[List[str]], default None
165        List of location_keys to search by.
166
167    params: Optional[Dict[str, Any]], default None
168        Dictionary of additional parameters to search by.
169        E.g. `--params pipe_id:1`
170
171    debug: bool, default False
172        Verbosity toggle.
173    """
174    from meerschaum.utils.debug import dprint
175    from meerschaum.utils.packages import attempt_import
176    from meerschaum.utils.misc import separate_negation_values, flatten_list
177    from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
178    from meerschaum.config.static import STATIC_CONFIG
179    import json
180    from copy import deepcopy
181    sqlalchemy, sqlalchemy_sql_functions = attempt_import('sqlalchemy', 'sqlalchemy.sql.functions')
182    coalesce = sqlalchemy_sql_functions.coalesce
183
184    if connector_keys is None:
185        connector_keys = []
186    if metric_keys is None:
187        metric_keys = []
188    if location_keys is None:
189        location_keys = []
190    else:
191        location_keys = [
192            (
193                lk
194                if lk not in ('[None]', 'None', 'null')
195                else 'None'
196            )
197            for lk in location_keys
198        ]
199    if tags is None:
200        tags = []
201
202    if params is None:
203        params = {}
204
205    ### Add three primary keys to params dictionary
206    ###   (separated for convenience of arguments).
207    cols = {
208        'connector_keys': [str(ck) for ck in connector_keys],
209        'metric_key': [str(mk) for mk in metric_keys],
210        'location_key': [str(lk) for lk in location_keys],
211    }
212
213    ### Make deep copy so we don't mutate this somewhere else.
214    parameters = deepcopy(params)
215    for col, vals in cols.items():
216        if vals not in [[], ['*']]:
217            parameters[col] = vals
218
219    if not table_exists('mrsm_pipes', self, schema=self.instance_schema, debug=debug):
220        return []
221
222    from meerschaum.connectors.sql.tables import get_tables
223    pipes_tbl = get_tables(mrsm_instance=self, create=False, debug=debug)['pipes']
224
225    _params = {}
226    for k, v in parameters.items():
227        _v = json.dumps(v) if isinstance(v, dict) else v
228        _params[k] = _v
229
230    negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix']
231    ### Parse regular params.
232    ### If a param begins with '_', negate it instead.
233    _where = [
234        (
235            (coalesce(pipes_tbl.c[key], 'None') == val)
236            if not str(val).startswith(negation_prefix)
237            else (pipes_tbl.c[key] != key)
238        ) for key, val in _params.items()
239        if not isinstance(val, (list, tuple)) and key in pipes_tbl.c
240    ]
241    select_cols = (
242        [
243            pipes_tbl.c.connector_keys,
244            pipes_tbl.c.metric_key,
245            pipes_tbl.c.location_key,
246        ]
247    )
248
249    q = sqlalchemy.select(*select_cols).where(sqlalchemy.and_(True, *_where))
250    for c, vals in cols.items():
251        if not isinstance(vals, (list, tuple)) or not vals or not c in pipes_tbl.c:
252            continue
253        _in_vals, _ex_vals = separate_negation_values(vals)
254        q = q.where(coalesce(pipes_tbl.c[c], 'None').in_(_in_vals)) if _in_vals else q
255        q = q.where(coalesce(pipes_tbl.c[c], 'None').not_in(_ex_vals)) if _ex_vals else q
256
257    ### Finally, parse tags.
258    tag_groups = [tag.split(',') for tag in tags]
259    in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
260
261    ors, nands = [], []
262    for _in_tags, _ex_tags in in_ex_tag_groups:
263        sub_ands = []
264        for nt in _in_tags:
265            sub_ands.append(
266                sqlalchemy.cast(
267                    pipes_tbl.c['parameters'],
268                    sqlalchemy.String,
269                ).like(f'%"tags":%"{nt}"%')
270            )
271        if sub_ands:
272            ors.append(sqlalchemy.and_(*sub_ands))
273
274        for xt in _ex_tags:
275            nands.append(
276                sqlalchemy.cast(
277                    pipes_tbl.c['parameters'],
278                    sqlalchemy.String,
279                ).not_like(f'%"tags":%"{xt}"%')
280            )
281
282    q = q.where(sqlalchemy.and_(*nands)) if nands else q
283    q = q.where(sqlalchemy.or_(*ors)) if ors else q
284    loc_asc = sqlalchemy.asc(pipes_tbl.c['location_key'])
285    if self.flavor not in OMIT_NULLSFIRST_FLAVORS:
286        loc_asc = sqlalchemy.nullsfirst(loc_asc)
287    q = q.order_by(
288        sqlalchemy.asc(pipes_tbl.c['connector_keys']),
289        sqlalchemy.asc(pipes_tbl.c['metric_key']),
290        loc_asc,
291    )
292
293    ### execute the query and return a list of tuples
294    if debug:
295        dprint(q.compile(compile_kwargs={'literal_binds': True}))
296    try:
297        rows = (
298            self.execute(q).fetchall()
299            if self.flavor != 'duckdb'
300            else [
301                (row['connector_keys'], row['metric_key'], row['location_key'])
302                for row in self.read(q).to_dict(orient='records')
303            ]
304        )
305    except Exception as e:
306        error(str(e))
307
308    return [(row[0], row[1], row[2]) for row in rows]

Return a list of tuples corresponding to the parameters provided.

Parameters
  • connector_keys (Optional[List[str]], default None): List of connector_keys to search by.
  • metric_keys (Optional[List[str]], default None): List of metric_keys to search by.
  • location_keys (Optional[List[str]], default None): List of location_keys to search by.
  • params (Optional[Dict[str, Any]], default None): Dictionary of additional parameters to search by. E.g. --params pipe_id:1
  • debug (bool, default False): Verbosity toggle.
def create_indices( self, pipe: meerschaum.Pipe, indices: Optional[List[str]] = None, debug: bool = False) -> bool:
311def create_indices(
312    self,
313    pipe: mrsm.Pipe,
314    indices: Optional[List[str]] = None,
315    debug: bool = False
316) -> bool:
317    """
318    Create a pipe's indices.
319    """
320    from meerschaum.utils.sql import sql_item_name, update_queries
321    from meerschaum.utils.debug import dprint
322    if debug:
323        dprint(f"Creating indices for {pipe}...")
324    if not pipe.indices:
325        warn(f"{pipe} has no index columns; skipping index creation.", stack=False)
326        return True
327
328    _ = pipe.__dict__.pop('_columns_indices', None)
329    ix_queries = {
330        ix: queries
331        for ix, queries in self.get_create_index_queries(pipe, debug=debug).items()
332        if indices is None or ix in indices
333    }
334    success = True
335    for ix, queries in ix_queries.items():
336        ix_success = all(self.exec_queries(queries, debug=debug, silent=False))
337        success = success and ix_success
338        if not ix_success:
339            warn(f"Failed to create index on column: {ix}")
340
341    return success

Create a pipe's indices.

def drop_indices( self, pipe: meerschaum.Pipe, indices: Optional[List[str]] = None, debug: bool = False) -> bool:
344def drop_indices(
345    self,
346    pipe: mrsm.Pipe,
347    indices: Optional[List[str]] = None,
348    debug: bool = False
349) -> bool:
350    """
351    Drop a pipe's indices.
352    """
353    from meerschaum.utils.debug import dprint
354    if debug:
355        dprint(f"Dropping indices for {pipe}...")
356    if not pipe.columns:
357        warn(f"Unable to drop indices for {pipe} without columns.", stack=False)
358        return False
359    ix_queries = {
360        ix: queries
361        for ix, queries in self.get_drop_index_queries(pipe, debug=debug).items()
362        if indices is None or ix in indices
363    }
364    success = True
365    for ix, queries in ix_queries.items():
366        ix_success = all(self.exec_queries(queries, debug=debug, silent=True))
367        if not ix_success:
368            success = False
369            if debug:
370                dprint(f"Failed to drop index on column: {ix}")
371    return success

Drop a pipe's indices.

def get_create_index_queries( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[str]]:
374def get_create_index_queries(
375    self,
376    pipe: mrsm.Pipe,
377    debug: bool = False,
378) -> Dict[str, List[str]]:
379    """
380    Return a dictionary mapping columns to a `CREATE INDEX` or equivalent query.
381
382    Parameters
383    ----------
384    pipe: mrsm.Pipe
385        The pipe to which the queries will correspond.
386
387    Returns
388    -------
389    A dictionary of index names mapping to lists of queries.
390    """
391    ### NOTE: Due to recent breaking changes in DuckDB, indices don't behave properly.
392    if self.flavor == 'duckdb':
393        return {}
394    from meerschaum.utils.sql import (
395        sql_item_name,
396        get_distinct_col_count,
397        update_queries,
398        get_null_replacement,
399        get_create_table_queries,
400        get_rename_table_queries,
401        COALESCE_UNIQUE_INDEX_FLAVORS,
402    )
403    from meerschaum.utils.dtypes.sql import (
404        get_db_type_from_pd_type,
405        get_pd_type_from_db_type,
406        AUTO_INCREMENT_COLUMN_FLAVORS,
407    )
408    from meerschaum.config import get_config
409    index_queries = {}
410
411    upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
412    static = pipe.parameters.get('static', False)
413    index_names = pipe.get_indices()
414    indices = pipe.indices
415    existing_cols_types = pipe.get_columns_types(debug=debug)
416    existing_cols_pd_types = {
417        col: get_pd_type_from_db_type(typ)
418        for col, typ in existing_cols_types.items()
419    }
420    existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
421    existing_ix_names = set()
422    existing_primary_keys = []
423    for col, col_indices in existing_cols_indices.items():
424        for col_ix_doc in col_indices:
425            existing_ix_names.add(col_ix_doc.get('name', None))
426            if col_ix_doc.get('type', None) == 'PRIMARY KEY':
427                existing_primary_keys.append(col)
428
429    _datetime = pipe.get_columns('datetime', error=False)
430    _datetime_name = (
431        sql_item_name(_datetime, self.flavor, None)
432        if _datetime is not None else None
433    )
434    _datetime_index_name = (
435        sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None)
436        if index_names.get('datetime', None)
437        else None
438    )
439    _id = pipe.get_columns('id', error=False)
440    _id_name = (
441        sql_item_name(_id, self.flavor, None)
442        if _id is not None
443        else None
444    )
445    primary_key = pipe.columns.get('primary', None)
446    primary_key_name = (
447        sql_item_name(primary_key, flavor=self.flavor, schema=None)
448        if primary_key
449        else None
450    )
451    autoincrement = (
452        pipe.parameters.get('autoincrement', False)
453        or (
454            primary_key is not None
455            and primary_key not in existing_cols_pd_types
456        )
457    )
458    primary_key_db_type = (
459        get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int'), self.flavor)
460        if primary_key
461        else None
462    )
463    primary_key_constraint_name = (
464        sql_item_name(f'pk_{pipe.target}', self.flavor, None)
465        if primary_key is not None
466        else None
467    )
468
469    _id_index_name = (
470        sql_item_name(index_names['id'], self.flavor, None)
471        if index_names.get('id', None)
472        else None
473    )
474    _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
475    _create_space_partition = get_config('system', 'experimental', 'space')
476
477    ### create datetime index
478    if _datetime is not None:
479        if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
480            _id_count = (
481                get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
482                if (_id is not None and _create_space_partition) else None
483            )
484
485            chunk_interval = pipe.get_chunk_interval(debug=debug)
486            chunk_interval_minutes = (
487                chunk_interval
488                if isinstance(chunk_interval, int)
489                else int(chunk_interval.total_seconds() / 60)
490            )
491            chunk_time_interval = (
492                f"INTERVAL '{chunk_interval_minutes} MINUTES'"
493                if isinstance(chunk_interval, timedelta)
494                else f'{chunk_interval_minutes}'
495            )
496
497            dt_query = (
498                f"SELECT public.create_hypertable('{_pipe_name}', " +
499                f"'{_datetime}', "
500                + (
501                    f"'{_id}', {_id_count}, " if (_id is not None and _create_space_partition)
502                    else ''
503                )
504                + f'chunk_time_interval => {chunk_time_interval}, '
505                + 'if_not_exists => true, '
506                + "migrate_data => true);"
507            )
508        elif self.flavor == 'mssql':
509            dt_query = (
510                "CREATE "
511                + ("CLUSTERED " if not primary_key else '')
512                + f"INDEX {_datetime_index_name} "
513                + f"ON {_pipe_name} ({_datetime_name})"
514            )
515        else: ### mssql, sqlite, etc.
516            dt_query = (
517                f"CREATE INDEX {_datetime_index_name} "
518                + f"ON {_pipe_name} ({_datetime_name})"
519            )
520
521        index_queries[_datetime] = [dt_query]
522
523    primary_queries = []
524    if (
525        primary_key is not None
526        and primary_key not in existing_primary_keys
527        and not static
528    ):
529        if autoincrement and primary_key not in existing_cols_pd_types:
530            autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get(
531                self.flavor,
532                AUTO_INCREMENT_COLUMN_FLAVORS['default']
533            )
534            primary_queries.extend([
535                (
536                    f"ALTER TABLE {_pipe_name}\n"
537                    f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}"
538                ),
539            ])
540        elif not autoincrement and primary_key in existing_cols_pd_types:
541            if self.flavor == 'sqlite':
542                new_table_name = sql_item_name(
543                    f'_new_{pipe.target}',
544                    self.flavor,
545                    self.get_pipe_schema(pipe)
546                )
547                select_cols_str = ', '.join(
548                    [
549                        sql_item_name(col, self.flavor, None)
550                        for col in existing_cols_types
551                    ]
552                )
553                primary_queries.extend(
554                    get_create_table_queries(
555                        existing_cols_pd_types,
556                        f'_new_{pipe.target}',
557                        self.flavor,
558                        schema=self.get_pipe_schema(pipe),
559                        primary_key=primary_key,
560                    ) + [
561                        (
562                            f"INSERT INTO {new_table_name} ({select_cols_str})\n"
563                            f"SELECT {select_cols_str}\nFROM {_pipe_name}"
564                        ),
565                        f"DROP TABLE {_pipe_name}",
566                    ] + get_rename_table_queries(
567                        f'_new_{pipe.target}',
568                        pipe.target,
569                        self.flavor,
570                        schema=self.get_pipe_schema(pipe),
571                    )
572                )
573            elif self.flavor == 'oracle':
574                primary_queries.extend([
575                    (
576                        f"ALTER TABLE {_pipe_name}\n"
577                        f"MODIFY {primary_key_name} NOT NULL"
578                    ),
579                    (
580                        f"ALTER TABLE {_pipe_name}\n"
581                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
582                    )
583                ])
584            elif self.flavor in ('mysql', 'mariadb'):
585                primary_queries.extend([
586                    (
587                        f"ALTER TABLE {_pipe_name}\n"
588                        f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL"
589                    ),
590                    (
591                        f"ALTER TABLE {_pipe_name}\n"
592                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
593                    )
594                ])
595            elif self.flavor == 'timescaledb':
596                primary_queries.extend([
597                    (
598                        f"ALTER TABLE {_pipe_name}\n"
599                        f"ALTER COLUMN {primary_key_name} SET NOT NULL"
600                    ),
601                    (
602                        f"ALTER TABLE {_pipe_name}\n"
603                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + (
604                            f"{_datetime_name}, " if _datetime_name else ""
605                        ) + f"{primary_key_name})"
606                    ),
607                ])
608            elif self.flavor in ('citus', 'postgresql', 'duckdb'):
609                primary_queries.extend([
610                    (
611                        f"ALTER TABLE {_pipe_name}\n"
612                        f"ALTER COLUMN {primary_key_name} SET NOT NULL"
613                    ),
614                    (
615                        f"ALTER TABLE {_pipe_name}\n"
616                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
617                    ),
618                ])
619            else:
620                primary_queries.extend([
621                    (
622                        f"ALTER TABLE {_pipe_name}\n"
623                        f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL"
624                    ),
625                    (
626                        f"ALTER TABLE {_pipe_name}\n"
627                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
628                    ),
629                ])
630        index_queries[primary_key] = primary_queries
631
632    ### create id index
633    if _id_name is not None:
634        if self.flavor == 'timescaledb':
635            ### Already created indices via create_hypertable.
636            id_query = (
637                None if (_id is not None and _create_space_partition)
638                else (
639                    f"CREATE INDEX IF NOT EXISTS {_id_index_name} ON {_pipe_name} ({_id_name})"
640                    if _id is not None
641                    else None
642                )
643            )
644            pass
645        else: ### mssql, sqlite, etc.
646            id_query = f"CREATE INDEX {_id_index_name} ON {_pipe_name} ({_id_name})"
647
648        if id_query is not None:
649            index_queries[_id] = id_query if isinstance(id_query, list) else [id_query]
650
651    ### Create indices for other labels in `pipe.columns`.
652    other_index_names = {
653        ix_key: ix_unquoted
654        for ix_key, ix_unquoted in index_names.items()
655        if ix_key not in ('datetime', 'id', 'primary') and ix_unquoted not in existing_ix_names
656    }
657    for ix_key, ix_unquoted in other_index_names.items():
658        ix_name = sql_item_name(ix_unquoted, self.flavor, None)
659        cols = indices[ix_key]
660        if not isinstance(cols, (list, tuple)):
661            cols = [cols]
662        cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
663        if not cols_names:
664            continue
665        cols_names_str = ", ".join(cols_names)
666        index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"]
667
668    indices_cols_str = ', '.join(
669        list({
670            sql_item_name(ix, self.flavor)
671            for ix_key, ix in pipe.columns.items()
672            if ix and ix in existing_cols_types
673        })
674    )
675    coalesce_indices_cols_str = ', '.join(
676        [
677            (
678                "COALESCE("
679                + sql_item_name(ix, self.flavor)
680                + ", "
681                + get_null_replacement(existing_cols_types[ix], self.flavor)
682                + ") "
683            ) if ix_key != 'datetime' else (sql_item_name(ix, self.flavor))
684            for ix_key, ix in pipe.columns.items()
685            if ix and ix in existing_cols_types
686        ]
687    )
688    unique_index_name = sql_item_name(pipe.target + '_unique_index', self.flavor)
689    constraint_name = sql_item_name(pipe.target + '_constraint', self.flavor)
690    add_constraint_query = (
691        f"ALTER TABLE {_pipe_name} ADD CONSTRAINT {constraint_name} UNIQUE ({indices_cols_str})"
692    )
693    unique_index_cols_str = (
694        indices_cols_str
695        if self.flavor not in COALESCE_UNIQUE_INDEX_FLAVORS
696        else coalesce_indices_cols_str
697    )
698    create_unique_index_query = (
699        f"CREATE UNIQUE INDEX {unique_index_name} ON {_pipe_name} ({unique_index_cols_str})"
700    )
701    constraint_queries = [create_unique_index_query]
702    if self.flavor != 'sqlite':
703        constraint_queries.append(add_constraint_query)
704    if upsert and indices_cols_str:
705        index_queries[unique_index_name] = constraint_queries
706    return index_queries

Return a dictionary mapping columns to a CREATE INDEX or equivalent query.

Parameters
  • pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
  • A dictionary of index names mapping to lists of queries.
def get_drop_index_queries( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[str]]:
709def get_drop_index_queries(
710    self,
711    pipe: mrsm.Pipe,
712    debug: bool = False,
713) -> Dict[str, List[str]]:
714    """
715    Return a dictionary mapping columns to a `DROP INDEX` or equivalent query.
716
717    Parameters
718    ----------
719    pipe: mrsm.Pipe
720        The pipe to which the queries will correspond.
721
722    Returns
723    -------
724    A dictionary of column names mapping to lists of queries.
725    """
726    ### NOTE: Due to breaking changes within DuckDB, indices must be skipped.
727    if self.flavor == 'duckdb':
728        return {}
729    if not pipe.exists(debug=debug):
730        return {}
731    from meerschaum.utils.sql import (
732        sql_item_name,
733        table_exists,
734        hypertable_queries,
735        DROP_IF_EXISTS_FLAVORS,
736    )
737    drop_queries = {}
738    schema = self.get_pipe_schema(pipe)
739    schema_prefix = (schema + '_') if schema else ''
740    indices = {
741        col: schema_prefix + ix
742        for col, ix in pipe.get_indices().items()
743    }
744    pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
745    pipe_name_no_schema = sql_item_name(pipe.target, self.flavor, None)
746
747    if self.flavor not in hypertable_queries:
748        is_hypertable = False
749    else:
750        is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name)
751        is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None
752
753    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
754    if is_hypertable:
755        nuke_queries = []
756        temp_table = '_' + pipe.target + '_temp_migration'
757        temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe))
758
759        if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug):
760            nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}")
761        nuke_queries += [
762            f"SELECT * INTO {temp_table_name} FROM {pipe_name}",
763            f"DROP TABLE {if_exists_str} {pipe_name}",
764            f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}",
765        ]
766        nuke_ix_keys = ('datetime', 'id')
767        nuked = False
768        for ix_key in nuke_ix_keys:
769            if ix_key in indices and not nuked:
770                drop_queries[ix_key] = nuke_queries
771                nuked = True
772
773    drop_queries.update({
774        ix_key: ["DROP INDEX " + sql_item_name(ix_unquoted, self.flavor, None)]
775        for ix_key, ix_unquoted in indices.items()
776        if ix_key not in drop_queries
777    })
778    return drop_queries

Return a dictionary mapping columns to a DROP INDEX or equivalent query.

Parameters
  • pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
  • A dictionary of column names mapping to lists of queries.
def get_add_columns_queries( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, Dict[str, str]]', _is_db_types: bool = False, debug: bool = False) -> List[str]:
2827def get_add_columns_queries(
2828    self,
2829    pipe: mrsm.Pipe,
2830    df: Union[pd.DataFrame, Dict[str, str]],
2831    _is_db_types: bool = False,
2832    debug: bool = False,
2833) -> List[str]:
2834    """
2835    Add new null columns of the correct type to a table from a dataframe.
2836
2837    Parameters
2838    ----------
2839    pipe: mrsm.Pipe
2840        The pipe to be altered.
2841
2842    df: Union[pd.DataFrame, Dict[str, str]]
2843        The pandas DataFrame which contains new columns.
2844        If a dictionary is provided, assume it maps columns to Pandas data types.
2845
2846    _is_db_types: bool, default False
2847        If `True`, assume `df` is a dictionary mapping columns to SQL native dtypes.
2848
2849    Returns
2850    -------
2851    A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
2852    """
2853    if not pipe.exists(debug=debug):
2854        return []
2855
2856    if pipe.parameters.get('static', False):
2857        return []
2858
2859    from decimal import Decimal
2860    import copy
2861    from meerschaum.utils.sql import (
2862        sql_item_name,
2863        SINGLE_ALTER_TABLE_FLAVORS,
2864        get_table_cols_types,
2865    )
2866    from meerschaum.utils.dtypes.sql import (
2867        get_pd_type_from_db_type,
2868        get_db_type_from_pd_type,
2869    )
2870    from meerschaum.utils.misc import flatten_list
2871    table_obj = self.get_pipe_table(pipe, debug=debug)
2872    is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False
2873    if is_dask:
2874        df = df.partitions[0].compute()
2875    df_cols_types = (
2876        {
2877            col: str(typ)
2878            for col, typ in df.dtypes.items()
2879        }
2880        if not isinstance(df, dict)
2881        else copy.deepcopy(df)
2882    )
2883    if not isinstance(df, dict) and len(df.index) > 0:
2884        for col, typ in list(df_cols_types.items()):
2885            if typ != 'object':
2886                continue
2887            val = df.iloc[0][col]
2888            if isinstance(val, (dict, list)):
2889                df_cols_types[col] = 'json'
2890            elif isinstance(val, Decimal):
2891                df_cols_types[col] = 'numeric'
2892            elif isinstance(val, str):
2893                df_cols_types[col] = 'str'
2894    db_cols_types = {
2895        col: get_pd_type_from_db_type(str(typ.type))
2896        for col, typ in table_obj.columns.items()
2897    } if table_obj is not None else {
2898        col: get_pd_type_from_db_type(typ)
2899        for col, typ in get_table_cols_types(
2900            pipe.target,
2901            self,
2902            schema=self.get_pipe_schema(pipe),
2903            debug=debug,
2904        ).items()
2905    }
2906    new_cols = set(df_cols_types) - set(db_cols_types)
2907    if not new_cols:
2908        return []
2909
2910    new_cols_types = {
2911        col: get_db_type_from_pd_type(
2912            df_cols_types[col],
2913            self.flavor
2914        ) for col in new_cols
2915    }
2916
2917    alter_table_query = "ALTER TABLE " + sql_item_name(
2918        pipe.target, self.flavor, self.get_pipe_schema(pipe)
2919    )
2920    queries = []
2921    for col, typ in new_cols_types.items():
2922        add_col_query = (
2923            "\nADD "
2924            + sql_item_name(col, self.flavor, None)
2925            + " " + typ + ","
2926        )
2927
2928        if self.flavor in SINGLE_ALTER_TABLE_FLAVORS:
2929            queries.append(alter_table_query + add_col_query[:-1])
2930        else:
2931            alter_table_query += add_col_query
2932
2933    ### For most flavors, only one query is required.
2934    ### This covers SQLite which requires one query per column.
2935    if not queries:
2936        queries.append(alter_table_query[:-1])
2937
2938    if self.flavor != 'duckdb':
2939        return queries
2940
2941    ### NOTE: For DuckDB, we must drop and rebuild the indices.
2942    drop_index_queries = list(flatten_list(
2943        [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()]
2944    ))
2945    create_index_queries = list(flatten_list(
2946        [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()]
2947    ))
2948
2949    return drop_index_queries + queries + create_index_queries

Add new null columns of the correct type to a table from a dataframe.

Parameters
  • pipe (mrsm.Pipe): The pipe to be altered.
  • df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which contains new columns. If a dictionary is provided, assume it maps columns to Pandas data types.
  • _is_db_types (bool, default False): If True, assume df is a dictionary mapping columns to SQL native dtypes.
Returns
  • A list of the ALTER TABLE SQL query or queries to be executed on the provided connector.
def get_alter_columns_queries( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, Dict[str, str]]', debug: bool = False) -> List[str]:
2952def get_alter_columns_queries(
2953    self,
2954    pipe: mrsm.Pipe,
2955    df: Union[pd.DataFrame, Dict[str, str]],
2956    debug: bool = False,
2957) -> List[str]:
2958    """
2959    If we encounter a column of a different type, set the entire column to text.
2960    If the altered columns are numeric, alter to numeric instead.
2961
2962    Parameters
2963    ----------
2964    pipe: mrsm.Pipe
2965        The pipe to be altered.
2966
2967    df: Union[pd.DataFrame, Dict[str, str]]
2968        The pandas DataFrame which may contain altered columns.
2969        If a dict is provided, assume it maps columns to Pandas data types.
2970
2971    Returns
2972    -------
2973    A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
2974    """
2975    if not pipe.exists(debug=debug):
2976        return []
2977    if pipe.static:
2978        return
2979    from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS, get_table_cols_types
2980    from meerschaum.utils.dataframe import get_numeric_cols
2981    from meerschaum.utils.dtypes import are_dtypes_equal
2982    from meerschaum.utils.dtypes.sql import (
2983        get_pd_type_from_db_type,
2984        get_db_type_from_pd_type,
2985    )
2986    from meerschaum.utils.misc import flatten_list, generate_password, items_str
2987    table_obj = self.get_pipe_table(pipe, debug=debug)
2988    target = pipe.target
2989    session_id = generate_password(3)
2990    numeric_cols = (
2991        get_numeric_cols(df)
2992        if not isinstance(df, dict)
2993        else [
2994            col
2995            for col, typ in df.items()
2996            if typ == 'numeric'
2997        ]
2998    )
2999    df_cols_types = (
3000        {
3001            col: str(typ)
3002            for col, typ in df.dtypes.items()
3003        }
3004        if not isinstance(df, dict)
3005        else df
3006    )
3007    db_cols_types = {
3008        col: get_pd_type_from_db_type(str(typ.type))
3009        for col, typ in table_obj.columns.items()
3010    } if table_obj is not None else {
3011        col: get_pd_type_from_db_type(typ)
3012        for col, typ in get_table_cols_types(
3013            pipe.target,
3014            self,
3015            schema=self.get_pipe_schema(pipe),
3016            debug=debug,
3017        ).items()
3018    }
3019    pipe_bool_cols = [col for col, typ in pipe.dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3020    pd_db_df_aliases = {
3021        'int': 'bool',
3022        'float': 'bool',
3023        'numeric': 'bool',
3024        'guid': 'object',
3025    }
3026    if self.flavor == 'oracle':
3027        pd_db_df_aliases['int'] = 'numeric'
3028
3029    altered_cols = {
3030        col: (db_cols_types.get(col, 'object'), typ)
3031        for col, typ in df_cols_types.items()
3032        if not are_dtypes_equal(typ, db_cols_types.get(col, 'object').lower())
3033        and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
3034    }
3035
3036    ### NOTE: Sometimes bools are coerced into ints or floats.
3037    altered_cols_to_ignore = set()
3038    for col, (db_typ, df_typ) in altered_cols.items():
3039        for db_alias, df_alias in pd_db_df_aliases.items():
3040            if db_alias in db_typ.lower() and df_alias in df_typ.lower():
3041                altered_cols_to_ignore.add(col)
3042
3043    ### Oracle's bool handling sometimes mixes NUMBER and INT.
3044    for bool_col in pipe_bool_cols:
3045        if bool_col not in altered_cols:
3046            continue
3047        db_is_bool_compatible = (
3048            are_dtypes_equal('int', altered_cols[bool_col][0])
3049            or are_dtypes_equal('float', altered_cols[bool_col][0])
3050            or are_dtypes_equal('numeric', altered_cols[bool_col][0])
3051            or are_dtypes_equal('bool', altered_cols[bool_col][0])
3052        )
3053        df_is_bool_compatible = (
3054            are_dtypes_equal('int', altered_cols[bool_col][1])
3055            or are_dtypes_equal('float', altered_cols[bool_col][1])
3056            or are_dtypes_equal('numeric', altered_cols[bool_col][1])
3057            or are_dtypes_equal('bool', altered_cols[bool_col][1])
3058        )
3059        if db_is_bool_compatible and df_is_bool_compatible:
3060            altered_cols_to_ignore.add(bool_col)
3061
3062    for col in altered_cols_to_ignore:
3063        _ = altered_cols.pop(col, None)
3064    if not altered_cols:
3065        return []
3066
3067    if numeric_cols:
3068        pipe.dtypes.update({col: 'numeric' for col in numeric_cols})
3069        edit_success, edit_msg = pipe.edit(debug=debug)
3070        if not edit_success:
3071            warn(
3072                f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
3073                + f"{edit_msg}"
3074            )
3075    else:
3076        numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ == 'numeric'])
3077
3078    numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
3079    text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
3080    altered_cols_types = {
3081        col: (
3082            numeric_type
3083            if col in numeric_cols
3084            else text_type
3085        )
3086        for col, (db_typ, typ) in altered_cols.items()
3087    }
3088
3089    if self.flavor == 'sqlite':
3090        temp_table_name = '-' + session_id + '_' + target
3091        rename_query = (
3092            "ALTER TABLE "
3093            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3094            + " RENAME TO "
3095            + sql_item_name(temp_table_name, self.flavor, None)
3096        )
3097        create_query = (
3098            "CREATE TABLE "
3099            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3100            + " (\n"
3101        )
3102        for col_name, col_obj in table_obj.columns.items():
3103            create_query += (
3104                sql_item_name(col_name, self.flavor, None)
3105                + " "
3106                + (
3107                    str(col_obj.type)
3108                    if col_name not in altered_cols
3109                    else altered_cols_types[col_name]
3110                )
3111                + ",\n"
3112            )
3113        create_query = create_query[:-2] + "\n)"
3114
3115        insert_query = (
3116            "INSERT INTO "
3117            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3118            + ' ('
3119            + ', '.join([
3120                sql_item_name(col_name, self.flavor, None)
3121                for col_name, _ in table_obj.columns.items()
3122            ])
3123            + ')'
3124            + "\nSELECT\n"
3125        )
3126        for col_name, col_obj in table_obj.columns.items():
3127            new_col_str = (
3128                sql_item_name(col_name, self.flavor, None)
3129                if col_name not in altered_cols
3130                else (
3131                    "CAST("
3132                    + sql_item_name(col_name, self.flavor, None)
3133                    + " AS "
3134                    + altered_cols_types[col_name]
3135                    + ")"
3136                )
3137            )
3138            insert_query += new_col_str + ",\n"
3139        insert_query = insert_query[:-2] + (
3140            f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}"
3141        )
3142
3143        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3144
3145        drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name(
3146            temp_table_name, self.flavor, self.get_pipe_schema(pipe)
3147        )
3148        return [
3149            rename_query,
3150            create_query,
3151            insert_query,
3152            drop_query,
3153        ]
3154
3155    queries = []
3156    if self.flavor == 'oracle':
3157        for col, typ in altered_cols_types.items():
3158            add_query = (
3159                "ALTER TABLE "
3160                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3161                + "\nADD " + sql_item_name(col + '_temp', self.flavor, None)
3162                + " " + typ
3163            )
3164            queries.append(add_query)
3165
3166        for col, typ in altered_cols_types.items():
3167            populate_temp_query = (
3168                "UPDATE "
3169                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3170                + "\nSET " + sql_item_name(col + '_temp', self.flavor, None)
3171                + ' = ' + sql_item_name(col, self.flavor, None)
3172            )
3173            queries.append(populate_temp_query)
3174
3175        for col, typ in altered_cols_types.items():
3176            set_old_cols_to_null_query = (
3177                "UPDATE "
3178                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3179                + "\nSET " + sql_item_name(col, self.flavor, None)
3180                + ' = NULL'
3181            )
3182            queries.append(set_old_cols_to_null_query)
3183
3184        for col, typ in altered_cols_types.items():
3185            alter_type_query = (
3186                "ALTER TABLE "
3187                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3188                + "\nMODIFY " + sql_item_name(col, self.flavor, None) + ' '
3189                + typ
3190            )
3191            queries.append(alter_type_query)
3192
3193        for col, typ in altered_cols_types.items():
3194            set_old_to_temp_query = (
3195                "UPDATE "
3196                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3197                + "\nSET " + sql_item_name(col, self.flavor, None)
3198                + ' = ' + sql_item_name(col + '_temp', self.flavor, None)
3199            )
3200            queries.append(set_old_to_temp_query)
3201
3202        for col, typ in altered_cols_types.items():
3203            drop_temp_query = (
3204                "ALTER TABLE "
3205                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3206                + "\nDROP COLUMN " + sql_item_name(col + '_temp', self.flavor, None)
3207            )
3208            queries.append(drop_temp_query)
3209
3210        return queries
3211
3212    query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3213    for col, typ in altered_cols_types.items():
3214        alter_col_prefix = (
3215            'ALTER' if self.flavor not in ('mysql', 'mariadb', 'oracle')
3216            else 'MODIFY'
3217        )
3218        type_prefix = (
3219            '' if self.flavor in ('mssql', 'mariadb', 'mysql')
3220            else 'TYPE '
3221        )
3222        column_str = 'COLUMN' if self.flavor != 'oracle' else ''
3223        query += (
3224            f"\n{alter_col_prefix} {column_str} "
3225            + sql_item_name(col, self.flavor, None)
3226            + " " + type_prefix + typ + ","
3227        )
3228
3229    query = query[:-1]
3230    queries.append(query)
3231    if self.flavor != 'duckdb':
3232        return queries
3233
3234    drop_index_queries = list(flatten_list(
3235        [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()]
3236    ))
3237    create_index_queries = list(flatten_list(
3238        [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()]
3239    ))
3240
3241    return drop_index_queries + queries + create_index_queries

If we encounter a column of a different type, set the entire column to text. If the altered columns are numeric, alter to numeric instead.

Parameters
  • pipe (mrsm.Pipe): The pipe to be altered.
  • df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which may contain altered columns. If a dict is provided, assume it maps columns to Pandas data types.
Returns
  • A list of the ALTER TABLE SQL query or queries to be executed on the provided connector.
def delete_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
781def delete_pipe(
782    self,
783    pipe: mrsm.Pipe,
784    debug: bool = False,
785) -> SuccessTuple:
786    """
787    Delete a Pipe's registration.
788    """
789    from meerschaum.utils.sql import sql_item_name
790    from meerschaum.utils.debug import dprint
791    from meerschaum.utils.packages import attempt_import
792    sqlalchemy = attempt_import('sqlalchemy')
793
794    if not pipe.id:
795        return False, f"{pipe} is not registered."
796
797    ### ensure pipes table exists
798    from meerschaum.connectors.sql.tables import get_tables
799    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
800
801    q = sqlalchemy.delete(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
802    if not self.exec(q, debug=debug):
803        return False, f"Failed to delete registration for {pipe}."
804
805    return True, "Success"

Delete a Pipe's registration.

def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, str, NoneType] = None, end: Union[datetime.datetime, str, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: str = 'asc', limit: Optional[int] = None, begin_add_minutes: int = 0, end_add_minutes: int = 0, debug: bool = False, **kw: Any) -> 'Union[pd.DataFrame, None]':
 808def get_pipe_data(
 809    self,
 810    pipe: mrsm.Pipe,
 811    select_columns: Optional[List[str]] = None,
 812    omit_columns: Optional[List[str]] = None,
 813    begin: Union[datetime, str, None] = None,
 814    end: Union[datetime, str, None] = None,
 815    params: Optional[Dict[str, Any]] = None,
 816    order: str = 'asc',
 817    limit: Optional[int] = None,
 818    begin_add_minutes: int = 0,
 819    end_add_minutes: int = 0,
 820    debug: bool = False,
 821    **kw: Any
 822) -> Union[pd.DataFrame, None]:
 823    """
 824    Access a pipe's data from the SQL instance.
 825
 826    Parameters
 827    ----------
 828    pipe: mrsm.Pipe:
 829        The pipe to get data from.
 830
 831    select_columns: Optional[List[str]], default None
 832        If provided, only select these given columns.
 833        Otherwise select all available columns (i.e. `SELECT *`).
 834
 835    omit_columns: Optional[List[str]], default None
 836        If provided, remove these columns from the selection.
 837
 838    begin: Union[datetime, str, None], default None
 839        If provided, get rows newer than or equal to this value.
 840
 841    end: Union[datetime, str, None], default None
 842        If provided, get rows older than or equal to this value.
 843
 844    params: Optional[Dict[str, Any]], default None
 845        Additional parameters to filter by.
 846        See `meerschaum.connectors.sql.build_where`.
 847
 848    order: Optional[str], default 'asc'
 849        The selection order for all of the indices in the query.
 850        If `None`, omit the `ORDER BY` clause.
 851
 852    limit: Optional[int], default None
 853        If specified, limit the number of rows retrieved to this value.
 854
 855    begin_add_minutes: int, default 0
 856        The number of minutes to add to the `begin` datetime (i.e. `DATEADD`.
 857
 858    end_add_minutes: int, default 0
 859        The number of minutes to add to the `end` datetime (i.e. `DATEADD`.
 860
 861    chunksize: Optional[int], default -1
 862        The size of dataframe chunks to load into memory.
 863
 864    debug: bool, default False
 865        Verbosity toggle.
 866
 867    Returns
 868    -------
 869    A `pd.DataFrame` of the pipe's data.
 870
 871    """
 872    import json
 873    from meerschaum.utils.sql import sql_item_name
 874    from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
 875    from meerschaum.utils.packages import import_pandas
 876    from meerschaum.utils.dtypes import (
 877        attempt_cast_to_numeric,
 878        attempt_cast_to_uuid,
 879        are_dtypes_equal,
 880    )
 881    from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
 882    pd = import_pandas()
 883    is_dask = 'dask' in pd.__name__
 884
 885    cols_types = pipe.get_columns_types(debug=debug)
 886    dtypes = {
 887        **{
 888            p_col: to_pandas_dtype(p_typ)
 889            for p_col, p_typ in pipe.dtypes.items()
 890        },
 891        **{
 892            col: get_pd_type_from_db_type(typ)
 893            for col, typ in cols_types.items()
 894        }
 895    }
 896    if dtypes:
 897        if self.flavor == 'sqlite':
 898            if not pipe.columns.get('datetime', None):
 899                _dt = pipe.guess_datetime()
 900                dt = sql_item_name(_dt, self.flavor, None) if _dt else None
 901                is_guess = True
 902            else:
 903                _dt = pipe.get_columns('datetime')
 904                dt = sql_item_name(_dt, self.flavor, None)
 905                is_guess = False
 906
 907            if _dt:
 908                dt_type = dtypes.get(_dt, 'object').lower()
 909                if 'datetime' not in dt_type:
 910                    if 'int' not in dt_type:
 911                        dtypes[_dt] = 'datetime64[ns, UTC]'
 912    existing_cols = pipe.get_columns_types(debug=debug)
 913    select_columns = (
 914        [
 915            col
 916            for col in existing_cols
 917            if col not in (omit_columns or [])
 918        ]
 919        if not select_columns
 920        else [
 921            col
 922            for col in select_columns
 923            if col in existing_cols
 924            and col not in (omit_columns or [])
 925        ]
 926    )
 927    if select_columns:
 928        dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
 929    dtypes = {
 930        col: to_pandas_dtype(typ)
 931        for col, typ in dtypes.items()
 932        if col in select_columns and col not in (omit_columns or [])
 933    }
 934    query = self.get_pipe_data_query(
 935        pipe,
 936        select_columns=select_columns,
 937        omit_columns=omit_columns,
 938        begin=begin,
 939        end=end,
 940        params=params,
 941        order=order,
 942        limit=limit,
 943        begin_add_minutes=begin_add_minutes,
 944        end_add_minutes=end_add_minutes,
 945        debug=debug,
 946        **kw
 947    )
 948
 949    if is_dask:
 950        index_col = pipe.columns.get('datetime', None)
 951        kw['index_col'] = index_col
 952
 953    numeric_columns = [
 954        col
 955        for col, typ in pipe.dtypes.items()
 956        if typ == 'numeric' and col in dtypes
 957    ]
 958    uuid_columns = [
 959        col
 960        for col, typ in pipe.dtypes.items()
 961        if typ == 'uuid' and col in dtypes
 962    ]
 963
 964    kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
 965
 966    df = self.read(
 967        query,
 968        dtype=dtypes,
 969        debug=debug,
 970        **kw
 971    )
 972    for col in numeric_columns:
 973        if col not in df.columns:
 974            continue
 975        df[col] = df[col].apply(attempt_cast_to_numeric)
 976
 977    for col in uuid_columns:
 978        if col not in df.columns:
 979            continue
 980        df[col] = df[col].apply(attempt_cast_to_uuid)
 981
 982    if self.flavor == 'sqlite':
 983        ignore_dt_cols = [
 984            col
 985            for col, dtype in pipe.dtypes.items()
 986            if not are_dtypes_equal(str(dtype), 'datetime')
 987        ]
 988        ### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
 989        df = (
 990            parse_df_datetimes(
 991                df,
 992                ignore_cols=ignore_dt_cols,
 993                chunksize=kw.get('chunksize', None),
 994                strip_timezone=(pipe.tzinfo is None),
 995                debug=debug,
 996            ) if isinstance(df, pd.DataFrame) else (
 997                [
 998                    parse_df_datetimes(
 999                        c,
1000                        ignore_cols=ignore_dt_cols,
1001                        chunksize=kw.get('chunksize', None),
1002                        strip_timezone=(pipe.tzinfo is None),
1003                        debug=debug,
1004                    )
1005                    for c in df
1006                ]
1007            )
1008        )
1009        for col, typ in dtypes.items():
1010            if typ != 'json':
1011                continue
1012            df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x)
1013    return df

Access a pipe's data from the SQL instance.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get data from.
  • select_columns (Optional[List[str]], default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
  • begin (Union[datetime, str, None], default None): If provided, get rows newer than or equal to this value.
  • end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
  • params (Optional[Dict[str, Any]], default None): Additional parameters to filter by. See meerschaum.connectors.sql.build_where.
  • order (Optional[str], default 'asc'): The selection order for all of the indices in the query. If None, omit the ORDER BY clause.
  • limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
  • begin_add_minutes (int, default 0): The number of minutes to add to the begin datetime (i.e. DATEADD.
  • end_add_minutes (int, default 0): The number of minutes to add to the end datetime (i.e. DATEADD.
  • chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A pd.DataFrame of the pipe's data.
def get_pipe_data_query( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, int, str, NoneType] = None, end: Union[datetime.datetime, int, str, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: Optional[str] = 'asc', sort_datetimes: bool = False, limit: Optional[int] = None, begin_add_minutes: int = 0, end_add_minutes: int = 0, replace_nulls: Optional[str] = None, skip_existing_cols_check: bool = False, debug: bool = False, **kw: Any) -> Optional[str]:
1016def get_pipe_data_query(
1017    self,
1018    pipe: mrsm.Pipe,
1019    select_columns: Optional[List[str]] = None,
1020    omit_columns: Optional[List[str]] = None,
1021    begin: Union[datetime, int, str, None] = None,
1022    end: Union[datetime, int, str, None] = None,
1023    params: Optional[Dict[str, Any]] = None,
1024    order: Optional[str] = 'asc',
1025    sort_datetimes: bool = False,
1026    limit: Optional[int] = None,
1027    begin_add_minutes: int = 0,
1028    end_add_minutes: int = 0,
1029    replace_nulls: Optional[str] = None,
1030    skip_existing_cols_check: bool = False,
1031    debug: bool = False,
1032    **kw: Any
1033) -> Union[str, None]:
1034    """
1035    Return the `SELECT` query for retrieving a pipe's data from its instance.
1036
1037    Parameters
1038    ----------
1039    pipe: mrsm.Pipe:
1040        The pipe to get data from.
1041
1042    select_columns: Optional[List[str]], default None
1043        If provided, only select these given columns.
1044        Otherwise select all available columns (i.e. `SELECT *`).
1045
1046    omit_columns: Optional[List[str]], default None
1047        If provided, remove these columns from the selection.
1048
1049    begin: Union[datetime, int, str, None], default None
1050        If provided, get rows newer than or equal to this value.
1051
1052    end: Union[datetime, str, None], default None
1053        If provided, get rows older than or equal to this value.
1054
1055    params: Optional[Dict[str, Any]], default None
1056        Additional parameters to filter by.
1057        See `meerschaum.connectors.sql.build_where`.
1058
1059    order: Optional[str], default None
1060        The selection order for all of the indices in the query.
1061        If `None`, omit the `ORDER BY` clause.
1062
1063    sort_datetimes: bool, default False
1064        Alias for `order='desc'`.
1065
1066    limit: Optional[int], default None
1067        If specified, limit the number of rows retrieved to this value.
1068
1069    begin_add_minutes: int, default 0
1070        The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1071
1072    end_add_minutes: int, default 0
1073        The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1074
1075    chunksize: Optional[int], default -1
1076        The size of dataframe chunks to load into memory.
1077
1078    replace_nulls: Optional[str], default None
1079        If provided, replace null values with this value.
1080
1081    skip_existing_cols_check: bool, default False
1082        If `True`, do not verify that querying columns are actually on the table.
1083
1084    debug: bool, default False
1085        Verbosity toggle.
1086
1087    Returns
1088    -------
1089    A `SELECT` query to retrieve a pipe's data.
1090    """
1091    from meerschaum.utils.misc import items_str
1092    from meerschaum.utils.sql import sql_item_name, dateadd_str
1093    from meerschaum.utils.dtypes import coerce_timezone
1094    from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1095
1096    dt_col = pipe.columns.get('datetime', None)
1097    existing_cols = pipe.get_columns_types(debug=debug)
1098    dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1099    select_columns = (
1100        [col for col in existing_cols]
1101        if not select_columns
1102        else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
1103    )
1104    if omit_columns:
1105        select_columns = [col for col in select_columns if col not in omit_columns]
1106
1107    if order is None and sort_datetimes:
1108        order = 'desc'
1109
1110    if begin == '':
1111        begin = pipe.get_sync_time(debug=debug)
1112        backtrack_interval = pipe.get_backtrack_interval(debug=debug)
1113        if begin is not None:
1114            begin -= backtrack_interval
1115
1116    begin, end = pipe.parse_date_bounds(begin, end)
1117    if isinstance(begin, datetime) and dt_typ:
1118        begin = coerce_timezone(begin, strip_utc=('utc' not in dt_typ.lower()))
1119    if isinstance(end, datetime) and dt_typ:
1120        end = coerce_timezone(end, strip_utc=('utc' not in dt_typ.lower()))
1121
1122    cols_names = [
1123        sql_item_name(col, self.flavor, None)
1124        for col in select_columns
1125    ]
1126    select_cols_str = (
1127        'SELECT\n    '
1128        + ',\n    '.join(
1129            [
1130                (
1131                    col_name
1132                    if not replace_nulls
1133                    else f"COALESCE(col_name, '{replace_nulls}') AS {col_name}"
1134                )
1135                for col_name in cols_names
1136            ]
1137        )
1138    ) if cols_names else 'SELECT *'
1139    pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1140    query = f"{select_cols_str}\nFROM {pipe_table_name}"
1141    where = ""
1142
1143    if order is not None:
1144        default_order = 'asc'
1145        if order not in ('asc', 'desc'):
1146            warn(f"Ignoring unsupported order '{order}'. Falling back to '{default_order}'.")
1147            order = default_order
1148        order = order.upper()
1149
1150    if not pipe.columns.get('datetime', None):
1151        _dt = pipe.guess_datetime()
1152        dt = sql_item_name(_dt, self.flavor, None) if _dt else None
1153        is_guess = True
1154    else:
1155        _dt = pipe.get_columns('datetime')
1156        dt = sql_item_name(_dt, self.flavor, None)
1157        is_guess = False
1158
1159    quoted_indices = {
1160        key: sql_item_name(val, self.flavor, None)
1161        for key, val in pipe.columns.items()
1162        if val in existing_cols or skip_existing_cols_check
1163    }
1164
1165    if begin is not None or end is not None:
1166        if is_guess:
1167            if _dt is None:
1168                warn(
1169                    f"No datetime could be determined for {pipe}."
1170                    + "\n    Ignoring begin and end...",
1171                    stack=False,
1172                )
1173                begin, end = None, None
1174            else:
1175                warn(
1176                    f"A datetime wasn't specified for {pipe}.\n"
1177                    + f"    Using column \"{_dt}\" for datetime bounds...",
1178                    stack=False,
1179                )
1180
1181    is_dt_bound = False
1182    if begin is not None and (_dt in existing_cols or skip_existing_cols_check):
1183        begin_da = dateadd_str(
1184            flavor=self.flavor,
1185            datepart='minute',
1186            number=begin_add_minutes,
1187            begin=begin,
1188        )
1189        where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1190        is_dt_bound = True
1191
1192    if end is not None and (_dt in existing_cols or skip_existing_cols_check):
1193        if 'int' in str(type(end)).lower() and end == begin:
1194            end += 1
1195        end_da = dateadd_str(
1196            flavor=self.flavor,
1197            datepart='minute',
1198            number=end_add_minutes,
1199            begin=end
1200        )
1201        where += f"{dt} < {end_da}"
1202        is_dt_bound = True
1203
1204    if params is not None:
1205        from meerschaum.utils.sql import build_where
1206        valid_params = {
1207            k: v
1208            for k, v in params.items()
1209            if k in existing_cols or skip_existing_cols_check
1210        }
1211        if valid_params:
1212            where += build_where(valid_params, self).replace(
1213                'WHERE', ('AND' if is_dt_bound else "")
1214            )
1215
1216    if len(where) > 0:
1217        query += "\nWHERE " + where
1218
1219    if order is not None:
1220        ### Sort by indices, starting with datetime.
1221        order_by = ""
1222        if quoted_indices:
1223            order_by += "\nORDER BY "
1224            if _dt and (_dt in existing_cols or skip_existing_cols_check):
1225                order_by += dt + ' ' + order + ','
1226            for key, quoted_col_name in quoted_indices.items():
1227                if dt == quoted_col_name:
1228                    continue
1229                order_by += ' ' + quoted_col_name + ' ' + order + ','
1230            order_by = order_by[:-1]
1231
1232        query += order_by
1233
1234    if isinstance(limit, int):
1235        if self.flavor == 'mssql':
1236            query = f'SELECT TOP {limit}\n' + query[len("SELECT "):]
1237        elif self.flavor == 'oracle':
1238            query = (
1239                f"SELECT * FROM (\n  {query}\n)\n"
1240                + f"WHERE ROWNUM IN ({', '.join([str(i) for i in range(1, limit+1)])})"
1241            )
1242        else:
1243            query += f"\nLIMIT {limit}"
1244
1245    if debug:
1246        to_print = (
1247            []
1248            + ([f"begin='{begin}'"] if begin else [])
1249            + ([f"end='{end}'"] if end else [])
1250            + ([f"params={params}"] if params else [])
1251        )
1252        dprint("Getting pipe data with constraints: " + items_str(to_print, quotes=False))
1253
1254    return query

Return the SELECT query for retrieving a pipe's data from its instance.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get data from.
  • select_columns (Optional[List[str]], default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
  • begin (Union[datetime, int, str, None], default None): If provided, get rows newer than or equal to this value.
  • end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
  • params (Optional[Dict[str, Any]], default None): Additional parameters to filter by. See meerschaum.connectors.sql.build_where.
  • order (Optional[str], default None): The selection order for all of the indices in the query. If None, omit the ORDER BY clause.
  • sort_datetimes (bool, default False): Alias for order='desc'.
  • limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
  • begin_add_minutes (int, default 0): The number of minutes to add to the begin datetime (i.e. DATEADD).
  • end_add_minutes (int, default 0): The number of minutes to add to the end datetime (i.e. DATEADD).
  • chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
  • replace_nulls (Optional[str], default None): If provided, replace null values with this value.
  • skip_existing_cols_check (bool, default False): If True, do not verify that querying columns are actually on the table.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SELECT query to retrieve a pipe's data.
def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
19def register_pipe(
20    self,
21    pipe: mrsm.Pipe,
22    debug: bool = False,
23) -> SuccessTuple:
24    """
25    Register a new pipe.
26    A pipe's attributes must be set before registering.
27    """
28    from meerschaum.utils.debug import dprint
29    from meerschaum.utils.packages import attempt_import
30    from meerschaum.utils.sql import json_flavors
31
32    ### ensure pipes table exists
33    from meerschaum.connectors.sql.tables import get_tables
34    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
35
36    if pipe.get_id(debug=debug) is not None:
37        return False, f"{pipe} is already registered."
38
39    ### NOTE: if `parameters` is supplied in the Pipe constructor,
40    ###       then `pipe.parameters` will exist and not be fetched from the database.
41
42    ### 1. Prioritize the Pipe object's `parameters` first.
43    ###    E.g. if the user manually sets the `parameters` property
44    ###    or if the Pipe already exists
45    ###    (which shouldn't be able to be registered anyway but that's an issue for later).
46    parameters = None
47    try:
48        parameters = pipe.parameters
49    except Exception as e:
50        if debug:
51            dprint(str(e))
52        parameters = None
53
54    ### ensure `parameters` is a dictionary
55    if parameters is None:
56        parameters = {}
57
58    import json
59    sqlalchemy = attempt_import('sqlalchemy')
60    values = {
61        'connector_keys' : pipe.connector_keys,
62        'metric_key'     : pipe.metric_key,
63        'location_key'   : pipe.location_key,
64        'parameters'     : (
65            json.dumps(parameters)
66            if self.flavor not in json_flavors
67            else parameters
68        ),
69    }
70    query = sqlalchemy.insert(pipes_tbl).values(**values)
71    result = self.exec(query, debug=debug)
72    if result is None:
73        return False, f"Failed to register {pipe}."
74    return True, f"Successfully registered {pipe}."

Register a new pipe. A pipe's attributes must be set before registering.

def edit_pipe( self, pipe: meerschaum.Pipe = None, patch: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
 77def edit_pipe(
 78    self,
 79    pipe : mrsm.Pipe = None,
 80    patch: bool = False,
 81    debug: bool = False,
 82    **kw : Any
 83) -> SuccessTuple:
 84    """
 85    Persist a Pipe's parameters to its database.
 86
 87    Parameters
 88    ----------
 89    pipe: mrsm.Pipe, default None
 90        The pipe to be edited.
 91    patch: bool, default False
 92        If patch is `True`, update the existing parameters by cascading.
 93        Otherwise overwrite the parameters (default).
 94    debug: bool, default False
 95        Verbosity toggle.
 96    """
 97
 98    if pipe.id is None:
 99        return False, f"{pipe} is not registered and cannot be edited."
100
101    from meerschaum.utils.debug import dprint
102    from meerschaum.utils.packages import attempt_import
103    from meerschaum.utils.sql import json_flavors
104    if not patch:
105        parameters = pipe.__dict__.get('_attributes', {}).get('parameters', {})
106    else:
107        from meerschaum import Pipe
108        from meerschaum.config._patch import apply_patch_to_config
109        original_parameters = Pipe(
110            pipe.connector_keys, pipe.metric_key, pipe.location_key,
111            mrsm_instance=pipe.instance_keys
112        ).parameters
113        parameters = apply_patch_to_config(
114            original_parameters,
115            pipe.parameters
116        )
117
118    ### ensure pipes table exists
119    from meerschaum.connectors.sql.tables import get_tables
120    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
121
122    import json
123    sqlalchemy = attempt_import('sqlalchemy')
124
125    values = {
126        'parameters': (
127            json.dumps(parameters)
128            if self.flavor not in json_flavors
129            else parameters
130        ),
131    }
132    q = sqlalchemy.update(pipes_tbl).values(**values).where(
133        pipes_tbl.c.pipe_id == pipe.id
134    )
135
136    result = self.exec(q, debug=debug)
137    message = (
138        f"Successfully edited {pipe}."
139        if result is not None else f"Failed to edit {pipe}."
140    )
141    return (result is not None), message

Persist a Pipe's parameters to its database.

Parameters
  • pipe (mrsm.Pipe, default None): The pipe to be edited.
  • patch (bool, default False): If patch is True, update the existing parameters by cascading. Otherwise overwrite the parameters (default).
  • debug (bool, default False): Verbosity toggle.
def get_pipe_id(self, pipe: meerschaum.Pipe, debug: bool = False) -> Any:
1257def get_pipe_id(
1258    self,
1259    pipe: mrsm.Pipe,
1260    debug: bool = False,
1261) -> Any:
1262    """
1263    Get a Pipe's ID from the pipes table.
1264    """
1265    if pipe.temporary:
1266        return None
1267    from meerschaum.utils.packages import attempt_import
1268    import json
1269    sqlalchemy = attempt_import('sqlalchemy')
1270    from meerschaum.connectors.sql.tables import get_tables
1271    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1272
1273    query = sqlalchemy.select(pipes_tbl.c.pipe_id).where(
1274        pipes_tbl.c.connector_keys == pipe.connector_keys
1275    ).where(
1276        pipes_tbl.c.metric_key == pipe.metric_key
1277    ).where(
1278        (pipes_tbl.c.location_key == pipe.location_key) if pipe.location_key is not None
1279        else pipes_tbl.c.location_key.is_(None)
1280    )
1281    _id = self.value(query, debug=debug, silent=pipe.temporary)
1282    if _id is not None:
1283        _id = int(_id)
1284    return _id

Get a Pipe's ID from the pipes table.

def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, Any]:
1287def get_pipe_attributes(
1288    self,
1289    pipe: mrsm.Pipe,
1290    debug: bool = False,
1291) -> Dict[str, Any]:
1292    """
1293    Get a Pipe's attributes dictionary.
1294    """
1295    from meerschaum.connectors.sql.tables import get_tables
1296    from meerschaum.utils.packages import attempt_import
1297    sqlalchemy = attempt_import('sqlalchemy')
1298
1299    if pipe.get_id(debug=debug) is None:
1300        return {}
1301
1302    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1303
1304    try:
1305        q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
1306        if debug:
1307            dprint(q)
1308        attributes = (
1309            dict(self.exec(q, silent=True, debug=debug).first()._mapping)
1310            if self.flavor != 'duckdb'
1311            else self.read(q, debug=debug).to_dict(orient='records')[0]
1312        )
1313    except Exception as e:
1314        import traceback
1315        traceback.print_exc()
1316        warn(e)
1317        print(pipe)
1318        return {}
1319
1320    ### handle non-PostgreSQL databases (text vs JSON)
1321    if not isinstance(attributes.get('parameters', None), dict):
1322        try:
1323            import json
1324            parameters = json.loads(attributes['parameters'])
1325            if isinstance(parameters, str) and parameters[0] == '{':
1326                parameters = json.loads(parameters)
1327            attributes['parameters'] = parameters
1328        except Exception as e:
1329            attributes['parameters'] = {}
1330
1331    return attributes

Get a Pipe's attributes dictionary.

def sync_pipe( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, str, Dict[Any, Any], None]' = None, begin: Optional[datetime.datetime] = None, end: Optional[datetime.datetime] = None, chunksize: Optional[int] = -1, check_existing: bool = True, blocking: bool = True, debug: bool = False, _check_temporary_tables: bool = True, **kw: Any) -> Tuple[bool, str]:
1398def sync_pipe(
1399    self,
1400    pipe: mrsm.Pipe,
1401    df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None,
1402    begin: Optional[datetime] = None,
1403    end: Optional[datetime] = None,
1404    chunksize: Optional[int] = -1,
1405    check_existing: bool = True,
1406    blocking: bool = True,
1407    debug: bool = False,
1408    _check_temporary_tables: bool = True,
1409    **kw: Any
1410) -> SuccessTuple:
1411    """
1412    Sync a pipe using a database connection.
1413
1414    Parameters
1415    ----------
1416    pipe: mrsm.Pipe
1417        The Meerschaum Pipe instance into which to sync the data.
1418
1419    df: Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]
1420        An optional DataFrame or equivalent to sync into the pipe.
1421        Defaults to `None`.
1422
1423    begin: Optional[datetime], default None
1424        Optionally specify the earliest datetime to search for data.
1425        Defaults to `None`.
1426
1427    end: Optional[datetime], default None
1428        Optionally specify the latest datetime to search for data.
1429        Defaults to `None`.
1430
1431    chunksize: Optional[int], default -1
1432        Specify the number of rows to sync per chunk.
1433        If `-1`, resort to system configuration (default is `900`).
1434        A `chunksize` of `None` will sync all rows in one transaction.
1435        Defaults to `-1`.
1436
1437    check_existing: bool, default True
1438        If `True`, pull and diff with existing data from the pipe. Defaults to `True`.
1439
1440    blocking: bool, default True
1441        If `True`, wait for sync to finish and return its result, otherwise asyncronously sync.
1442        Defaults to `True`.
1443
1444    debug: bool, default False
1445        Verbosity toggle. Defaults to False.
1446
1447    kw: Any
1448        Catch-all for keyword arguments.
1449
1450    Returns
1451    -------
1452    A `SuccessTuple` of success (`bool`) and message (`str`).
1453    """
1454    from meerschaum.utils.packages import import_pandas
1455    from meerschaum.utils.sql import (
1456        get_update_queries,
1457        sql_item_name,
1458        update_queries,
1459        get_create_table_queries,
1460        get_reset_autoincrement_queries,
1461    )
1462    from meerschaum.utils.misc import generate_password
1463    from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1464    from meerschaum.utils.dtypes import are_dtypes_equal
1465    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1466    from meerschaum import Pipe
1467    import time
1468    import copy
1469    pd = import_pandas()
1470    if df is None:
1471        msg = f"DataFrame is None. Cannot sync {pipe}."
1472        warn(msg)
1473        return False, msg
1474
1475    start = time.perf_counter()
1476    pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
1477
1478    if not pipe.temporary and not pipe.get_id(debug=debug):
1479        register_tuple = pipe.register(debug=debug)
1480        if not register_tuple[0]:
1481            return register_tuple
1482
1483    ### df is the dataframe returned from the remote source
1484    ### via the connector
1485    if debug:
1486        dprint("Fetched data:\n" + str(df))
1487
1488    if not isinstance(df, pd.DataFrame):
1489        df = pipe.enforce_dtypes(
1490            df,
1491            chunksize=chunksize,
1492            safe_copy=kw.get('safe_copy', False),
1493            debug=debug,
1494        )
1495
1496    ### if table does not exist, create it with indices
1497    is_new = False
1498    if not pipe.exists(debug=debug):
1499        check_existing = False
1500        is_new = True
1501    else:
1502        ### Check for new columns.
1503        add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
1504        if add_cols_queries:
1505            _ = pipe.__dict__.pop('_columns_indices', None)
1506            _ = pipe.__dict__.pop('_columns_types', None)
1507            if not self.exec_queries(add_cols_queries, debug=debug):
1508                warn(f"Failed to add new columns to {pipe}.")
1509
1510        alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
1511        if alter_cols_queries:
1512            _ = pipe.__dict__.pop('_columns_indices', None)
1513            _ = pipe.__dict__.pop('_columns_types', None)
1514            if not self.exec_queries(alter_cols_queries, debug=debug):
1515                warn(f"Failed to alter columns for {pipe}.")
1516            else:
1517                _ = pipe.infer_dtypes(persist=True)
1518
1519    ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
1520    ### so infer bools and persist them to `dtypes`.
1521    if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
1522        pipe_dtypes = pipe.dtypes
1523        new_bool_cols = {
1524            col: 'bool[pyarrow]'
1525            for col, typ in df.dtypes.items()
1526            if col not in pipe_dtypes
1527            and are_dtypes_equal(str(typ), 'bool')
1528        }
1529        pipe_dtypes.update(new_bool_cols)
1530        pipe.dtypes = pipe_dtypes
1531        if new_bool_cols and not pipe.temporary:
1532            infer_bool_success, infer_bool_msg = pipe.edit(debug=debug)
1533            if not infer_bool_success:
1534                return infer_bool_success, infer_bool_msg
1535
1536    upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
1537    if upsert:
1538        check_existing = False
1539    kw['safe_copy'] = kw.get('safe_copy', False)
1540
1541    unseen_df, update_df, delta_df = (
1542        pipe.filter_existing(
1543            df,
1544            chunksize=chunksize,
1545            debug=debug,
1546            **kw
1547        ) if check_existing else (df, None, df)
1548    )
1549    if upsert:
1550        unseen_df, update_df, delta_df = (df.head(0), df, df)
1551
1552    if debug:
1553        dprint("Delta data:\n" + str(delta_df))
1554        dprint("Unseen data:\n" + str(unseen_df))
1555        if update_df is not None:
1556            dprint(("Update" if not upsert else "Upsert") + " data:\n" + str(update_df))
1557
1558    if_exists = kw.get('if_exists', 'append')
1559    if 'if_exists' in kw:
1560        kw.pop('if_exists')
1561    if 'name' in kw:
1562        kw.pop('name')
1563
1564    ### Insert new data into Pipe's table.
1565    unseen_kw = copy.deepcopy(kw)
1566    unseen_kw.update({
1567        'name': pipe.target,
1568        'if_exists': if_exists,
1569        'debug': debug,
1570        'as_dict': True,
1571        'chunksize': chunksize,
1572        'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1573        'schema': self.get_pipe_schema(pipe),
1574    })
1575
1576    primary_key = pipe.columns.get('primary', None)
1577    autoincrement = (
1578        pipe.parameters.get('autoincrement', False)
1579        or (
1580            is_new
1581            and primary_key
1582            and primary_key
1583            not in pipe.dtypes
1584            and primary_key not in unseen_df.columns
1585        )
1586    )
1587    if autoincrement and autoincrement not in pipe.parameters:
1588        pipe.parameters['autoincrement'] = autoincrement
1589        edit_success, edit_msg = pipe.edit(debug=debug)
1590        if not edit_success:
1591            return edit_success, edit_msg
1592
1593    autoincrement_needs_reset = False
1594    if autoincrement and primary_key:
1595        if primary_key not in df.columns:
1596            if unseen_df is not None and primary_key in unseen_df.columns:
1597                del unseen_df[primary_key]
1598            if update_df is not None and primary_key in update_df.columns:
1599                del update_df[primary_key]
1600            if delta_df is not None and primary_key in delta_df.columns:
1601                del delta_df[primary_key]
1602        elif unseen_df[primary_key].notnull().any():
1603            autoincrement_needs_reset = True
1604
1605    if is_new:
1606        create_success, create_msg = self.create_pipe_table_from_df(
1607            pipe,
1608            unseen_df,
1609            debug=debug,
1610        )
1611        if not create_success:
1612            return create_success, create_msg
1613
1614    do_identity_insert = bool(
1615        self.flavor in ('mssql',)
1616        and primary_key in unseen_df.columns
1617        and autoincrement
1618    )
1619    with self.engine.connect() as connection:
1620        with connection.begin():
1621            if do_identity_insert:
1622                identity_on_result = self.exec(
1623                    f"SET IDENTITY_INSERT {pipe_name} ON",
1624                    commit=False,
1625                    _connection=connection,
1626                    close=False,
1627                    debug=debug,
1628                )
1629                if identity_on_result is None:
1630                    return False, f"Could not enable identity inserts on {pipe}."
1631
1632            stats = self.to_sql(
1633                unseen_df,
1634                _connection=connection,
1635                **unseen_kw
1636            )
1637
1638            if do_identity_insert:
1639                identity_off_result = self.exec(
1640                    f"SET IDENTITY_INSERT {pipe_name} OFF",
1641                    commit=False,
1642                    _connection=connection,
1643                    close=False,
1644                    debug=debug,
1645                )
1646                if identity_off_result is None:
1647                    return False, f"Could not disable identity inserts on {pipe}."
1648
1649    if is_new:
1650        if not self.create_indices(pipe, debug=debug):
1651            warn(f"Failed to create indices for {pipe}. Continuing...")
1652
1653    if autoincrement_needs_reset:
1654        reset_autoincrement_queries = get_reset_autoincrement_queries(
1655            pipe.target,
1656            primary_key,
1657            self,
1658            schema=self.get_pipe_schema(pipe),
1659            debug=debug,
1660        )
1661        results = self.exec_queries(reset_autoincrement_queries, debug=debug)
1662        for result in results:
1663            if result is None:
1664                warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
1665
1666    if update_df is not None and len(update_df) > 0:
1667        transact_id = generate_password(3)
1668        temp_prefix = '##' if self.flavor != 'oracle' else '_'
1669        temp_target = temp_prefix + transact_id + '_' + pipe.target
1670        self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
1671        temp_pipe = Pipe(
1672            pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
1673            instance=pipe.instance_keys,
1674            columns={
1675                (ix_key if ix_key != 'primary' else 'primary_'): ix
1676                for ix_key, ix in pipe.columns.items()
1677                if ix and ix in update_df.columns
1678            },
1679            dtypes={
1680                col: typ
1681                for col, typ in pipe.dtypes.items()
1682                if col in update_df.columns
1683            },
1684            target=temp_target,
1685            temporary=True,
1686            parameters={
1687                'static': True,
1688                'schema': self.internal_schema,
1689                'hypertable': False,
1690                'autoincrement': False,
1691            },
1692        )
1693        temp_pipe.__dict__['_columns_types'] = {
1694            col: get_db_type_from_pd_type(
1695                pipe.dtypes.get(col, str(typ)),
1696                self.flavor,
1697            )
1698            for col, typ in update_df.dtypes.items()
1699        }
1700        now_ts = time.perf_counter()
1701        temp_pipe.__dict__['_columns_types_timestamp'] = now_ts
1702        temp_pipe.__dict__['_skip_check_indices'] = True
1703        temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
1704        if not temp_success:
1705            return temp_success, temp_msg
1706        existing_cols = pipe.get_columns_types(debug=debug)
1707        join_cols = [
1708            col
1709            for col_key, col in pipe.columns.items()
1710            if col and col in existing_cols
1711        ]
1712        update_queries = get_update_queries(
1713            pipe.target,
1714            temp_target,
1715            self,
1716            join_cols,
1717            upsert=upsert,
1718            schema=self.get_pipe_schema(pipe),
1719            patch_schema=self.internal_schema,
1720            datetime_col=pipe.columns.get('datetime', None),
1721            debug=debug,
1722        )
1723        update_success = all(
1724            self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
1725        )
1726        self._log_temporary_tables_creation(
1727            temp_target,
1728            ready_to_drop=True,
1729            create=(not pipe.temporary),
1730            debug=debug,
1731        )
1732        if not update_success:
1733            warn(f"Failed to apply update to {pipe}.")
1734
1735    stop = time.perf_counter()
1736    success = stats['success']
1737    if not success:
1738        return success, stats['msg']
1739
1740    unseen_count = len(unseen_df.index) if unseen_df is not None else 0
1741    update_count = len(update_df.index) if update_df is not None else 0
1742    msg = (
1743        (
1744            f"Inserted {unseen_count}, "
1745            + f"updated {update_count} rows."
1746        )
1747        if not upsert
1748        else (
1749            f"Upserted {update_count} row"
1750            + ('s' if update_count != 1 else '')
1751            + "."
1752        )
1753    )
1754    if debug:
1755        msg = msg[:-1] + (
1756            f"\non table {sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))}\n"
1757            + f"in {round(stop - start, 2)} seconds."
1758        )
1759
1760    if _check_temporary_tables:
1761        drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(
1762            refresh=False, debug=debug
1763        )
1764        if not drop_stale_success:
1765            warn(drop_stale_msg)
1766
1767    return success, msg

Sync a pipe using a database connection.

Parameters
  • pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
  • df (Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]): An optional DataFrame or equivalent to sync into the pipe. Defaults to None.
  • begin (Optional[datetime], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Optional[datetime], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe. Defaults to True.
  • blocking (bool, default True): If True, wait for sync to finish and return its result, otherwise asyncronously sync. Defaults to True.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
  • kw (Any): Catch-all for keyword arguments.
Returns
  • A SuccessTuple of success (bool) and message (str).
def sync_pipe_inplace( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, chunksize: Optional[int] = -1, check_existing: bool = True, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
1770def sync_pipe_inplace(
1771    self,
1772    pipe: 'mrsm.Pipe',
1773    params: Optional[Dict[str, Any]] = None,
1774    begin: Union[datetime, int, None] = None,
1775    end: Union[datetime, int, None] = None,
1776    chunksize: Optional[int] = -1,
1777    check_existing: bool = True,
1778    debug: bool = False,
1779    **kw: Any
1780) -> SuccessTuple:
1781    """
1782    If a pipe's connector is the same as its instance connector,
1783    it's more efficient to sync the pipe in-place rather than reading data into Pandas.
1784
1785    Parameters
1786    ----------
1787    pipe: mrsm.Pipe
1788        The pipe whose connector is the same as its instance.
1789
1790    params: Optional[Dict[str, Any]], default None
1791        Optional params dictionary to build the `WHERE` clause.
1792        See `meerschaum.utils.sql.build_where`.
1793
1794    begin: Union[datetime, int, None], default None
1795        Optionally specify the earliest datetime to search for data.
1796        Defaults to `None`.
1797
1798    end: Union[datetime, int, None], default None
1799        Optionally specify the latest datetime to search for data.
1800        Defaults to `None`.
1801
1802    chunksize: Optional[int], default -1
1803        Specify the number of rows to sync per chunk.
1804        If `-1`, resort to system configuration (default is `900`).
1805        A `chunksize` of `None` will sync all rows in one transaction.
1806        Defaults to `-1`.
1807
1808    check_existing: bool, default True
1809        If `True`, pull and diff with existing data from the pipe.
1810
1811    debug: bool, default False
1812        Verbosity toggle.
1813
1814    Returns
1815    -------
1816    A SuccessTuple.
1817    """
1818    if self.flavor == 'duckdb':
1819        return pipe.sync(
1820            params=params,
1821            begin=begin,
1822            end=end,
1823            chunksize=chunksize,
1824            check_existing=check_existing,
1825            debug=debug,
1826            _inplace=False,
1827            **kw
1828        )
1829    from meerschaum.utils.sql import (
1830        sql_item_name,
1831        get_update_queries,
1832        get_null_replacement,
1833        get_create_table_queries,
1834        get_table_cols_types,
1835        session_execute,
1836        update_queries,
1837    )
1838    from meerschaum.utils.dtypes import are_dtypes_equal
1839    from meerschaum.utils.dtypes.sql import (
1840        get_pd_type_from_db_type,
1841    )
1842    from meerschaum.utils.misc import generate_password
1843
1844    transact_id = generate_password(3)
1845    def get_temp_table_name(label: str) -> str:
1846        temp_prefix = '##' if self.flavor != 'oracle' else '_'
1847        return temp_prefix + transact_id + '_' + label + '_' + pipe.target
1848
1849    internal_schema = self.internal_schema
1850    temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
1851    temp_tables = {
1852        table_root: get_temp_table_name(table_root)
1853        for table_root in temp_table_roots
1854    }
1855    temp_table_names = {
1856        table_root: sql_item_name(
1857            table_name_raw,
1858            self.flavor,
1859            internal_schema,
1860        )
1861        for table_root, table_name_raw in temp_tables.items()
1862    }
1863    metadef = self.get_pipe_metadef(
1864        pipe,
1865        params=params,
1866        begin=begin,
1867        end=end,
1868        check_existing=check_existing,
1869        debug=debug,
1870    )
1871    pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1872    upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries
1873    static = pipe.parameters.get('static', False)
1874    database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
1875    primary_key = pipe.columns.get('primary', None)
1876    autoincrement = pipe.parameters.get('autoincrement', False)
1877    dt_col = pipe.columns.get('datetime', None)
1878    dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
1879
1880    def clean_up_temp_tables(ready_to_drop: bool = False):
1881        log_success, log_msg = self._log_temporary_tables_creation(
1882            [
1883                table
1884                for table in temp_tables.values()
1885            ] if not upsert else [temp_tables['update']],
1886            ready_to_drop=ready_to_drop,
1887            create=(not pipe.temporary),
1888            debug=debug,
1889        )
1890        if not log_success:
1891            warn(log_msg)
1892        drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(
1893            refresh=False,
1894            debug=debug,
1895        )
1896        if not drop_stale_success:
1897            warn(drop_stale_msg)
1898        return drop_stale_success, drop_stale_msg
1899
1900    sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm')
1901    if not pipe.exists(debug=debug):
1902        create_pipe_queries = get_create_table_queries(
1903            metadef,
1904            pipe.target,
1905            self.flavor,
1906            schema=self.get_pipe_schema(pipe),
1907            primary_key=primary_key,
1908            autoincrement=autoincrement,
1909            datetime_column=dt_col,
1910        )
1911        result = self.exec_queries(create_pipe_queries, debug=debug)
1912        if result is None:
1913            _ = clean_up_temp_tables()
1914            return False, f"Could not insert new data into {pipe} from its SQL query definition."
1915
1916        if not self.create_indices(pipe, debug=debug):
1917            warn(f"Failed to create indices for {pipe}. Continuing...")
1918
1919        rowcount = pipe.get_rowcount(debug=debug)
1920        _ = clean_up_temp_tables()
1921        return True, f"Inserted {rowcount}, updated 0 rows."
1922
1923    session = sqlalchemy_orm.Session(self.engine)
1924    connectable = session if self.flavor != 'duckdb' else self
1925
1926    create_new_query = get_create_table_queries(
1927        metadef,
1928        temp_tables[('new') if not upsert else 'update'],
1929        self.flavor,
1930        schema=internal_schema,
1931    )[0]
1932    (create_new_success, create_new_msg), create_new_results = session_execute(
1933        session,
1934        create_new_query,
1935        with_results=True,
1936        debug=debug,
1937    )
1938    if not create_new_success:
1939        _ = clean_up_temp_tables()
1940        return create_new_success, create_new_msg
1941    new_count = create_new_results[0].rowcount if create_new_results else 0
1942
1943    new_cols_types = get_table_cols_types(
1944        temp_tables[('new' if not upsert else 'update')],
1945        connectable=connectable,
1946        flavor=self.flavor,
1947        schema=internal_schema,
1948        database=database,
1949        debug=debug,
1950    ) if not static else pipe.get_columns_types(debug=debug)
1951    if not new_cols_types:
1952        return False, f"Failed to get new columns for {pipe}."
1953
1954    new_cols = {
1955        str(col_name): get_pd_type_from_db_type(str(col_type))
1956        for col_name, col_type in new_cols_types.items()
1957    }
1958    new_cols_str = ', '.join([
1959        sql_item_name(col, self.flavor)
1960        for col in new_cols
1961    ])
1962    def get_col_typ(col: str, cols_types: Dict[str, str]) -> str:
1963        if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char':
1964            return new_cols_types[col]
1965        return cols_types[col]
1966
1967    add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
1968    if add_cols_queries:
1969        _ = pipe.__dict__.pop('_columns_types', None)
1970        _ = pipe.__dict__.pop('_columns_indices', None)
1971        self.exec_queries(add_cols_queries, debug=debug)
1972
1973    alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
1974    if alter_cols_queries:
1975        _ = pipe.__dict__.pop('_columns_types', None)
1976        self.exec_queries(alter_cols_queries, debug=debug)
1977
1978    insert_queries = [
1979        (
1980            f"INSERT INTO {pipe_name} ({new_cols_str})\n"
1981            + f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}"
1982        )
1983    ] if not check_existing and not upsert else []
1984
1985    new_queries = insert_queries
1986    new_success, new_msg = (
1987        session_execute(session, new_queries, debug=debug)
1988        if new_queries
1989        else (True, "Success")
1990    )
1991    if not new_success:
1992        _ = clean_up_temp_tables()
1993        return new_success, new_msg
1994
1995    if not check_existing:
1996        session.commit()
1997        _ = clean_up_temp_tables()
1998        return True, f"Inserted {new_count}, updated 0 rows."
1999
2000    (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
2001        session,
2002        [
2003            "SELECT\n"
2004            f"    MIN({dt_col_name}) AS {sql_item_name('min_dt', self.flavor)},\n"
2005            f"    MAX({dt_col_name}) AS {sql_item_name('max_dt', self.flavor)}\n"
2006            f"FROM {temp_table_names['new' if not upsert else 'update']}\n"
2007            f"WHERE {dt_col_name} IS NOT NULL"
2008        ],
2009        with_results=True,
2010        debug=debug,
2011    ) if dt_col and not upsert else ((True, "Success"), None)
2012    if not new_dt_bounds_success:
2013        return (
2014            new_dt_bounds_success,
2015            f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}"
2016        )
2017
2018    if dt_col and not upsert:
2019        begin, end = new_dt_bounds_results[0].fetchone()
2020
2021    backtrack_def = self.get_pipe_data_query(
2022        pipe,
2023        begin=begin,
2024        end=end,
2025        begin_add_minutes=0,
2026        end_add_minutes=1,
2027        params=params,
2028        debug=debug,
2029        order=None,
2030    )
2031    create_backtrack_query = get_create_table_queries(
2032        backtrack_def,
2033        temp_tables['backtrack'],
2034        self.flavor,
2035        schema=internal_schema,
2036    )[0]
2037    (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute(
2038        session,
2039        create_backtrack_query,
2040        with_results=True,
2041        debug=debug,
2042    ) if not upsert else ((True, "Success"), None)
2043
2044    if not create_backtrack_success:
2045        _ = clean_up_temp_tables()
2046        return create_backtrack_success, create_backtrack_msg
2047
2048    backtrack_cols_types = get_table_cols_types(
2049        temp_tables['backtrack'],
2050        connectable=connectable,
2051        flavor=self.flavor,
2052        schema=internal_schema,
2053        database=database,
2054        debug=debug,
2055    ) if not (upsert or static) else new_cols_types
2056
2057    common_cols = [col for col in new_cols if col in backtrack_cols_types]
2058    on_cols = {
2059        col: new_cols.get(col)
2060        for col_key, col in pipe.columns.items()
2061        if (
2062            col
2063            and
2064            col_key != 'value'
2065            and col in backtrack_cols_types
2066            and col in new_cols
2067        )
2068    }
2069
2070    null_replace_new_cols_str = (
2071        ', '.join([
2072            f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, "
2073            + get_null_replacement(get_col_typ(col, new_cols), self.flavor)
2074            + ") AS "
2075            + sql_item_name(col, self.flavor, None)
2076            for col, typ in new_cols.items()
2077        ])
2078    )
2079
2080    select_delta_query = (
2081        "SELECT\n"
2082        + null_replace_new_cols_str + "\n"
2083        + f"\nFROM {temp_table_names['new']}\n"
2084        + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n"
2085        + '\nAND\n'.join([
2086            (
2087                f"COALESCE({temp_table_names['new']}."
2088                + sql_item_name(c, self.flavor, None)
2089                + ", "
2090                + get_null_replacement(get_col_typ(c, new_cols), self.flavor)
2091                + ") "
2092                + ' = '
2093                + f"COALESCE({temp_table_names['backtrack']}."
2094                + sql_item_name(c, self.flavor, None)
2095                + ", "
2096                + get_null_replacement(backtrack_cols_types[c], self.flavor)
2097                + ") "
2098            ) for c in common_cols
2099        ])
2100        + "\nWHERE\n"
2101        + '\nAND\n'.join([
2102            (
2103                f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) + ' IS NULL'
2104            ) for c in common_cols
2105        ])
2106    )
2107    create_delta_query = get_create_table_queries(
2108        select_delta_query,
2109        temp_tables['delta'],
2110        self.flavor,
2111        schema=internal_schema,
2112    )[0]
2113    create_delta_success, create_delta_msg = session_execute(
2114        session,
2115        create_delta_query,
2116        debug=debug,
2117    ) if not upsert else (True, "Success")
2118    if not create_delta_success:
2119        _ = clean_up_temp_tables()
2120        return create_delta_success, create_delta_msg
2121
2122    delta_cols_types = get_table_cols_types(
2123        temp_tables['delta'],
2124        connectable=connectable,
2125        flavor=self.flavor,
2126        schema=internal_schema,
2127        database=database,
2128        debug=debug,
2129    ) if not (upsert or static) else new_cols_types
2130
2131    ### This is a weird bug on SQLite.
2132    ### Sometimes the backtrack dtypes are all empty strings.
2133    if not all(delta_cols_types.values()):
2134        delta_cols_types = new_cols_types
2135
2136    delta_cols = {
2137        col: get_pd_type_from_db_type(typ)
2138        for col, typ in delta_cols_types.items()
2139    }
2140    delta_cols_str = ', '.join([
2141        sql_item_name(col, self.flavor)
2142        for col in delta_cols
2143    ])
2144
2145    select_joined_query = (
2146        "SELECT "
2147        + (', '.join([
2148            (
2149                f"{temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
2150                + " AS " + sql_item_name(c + '_delta', self.flavor, None)
2151            ) for c in delta_cols
2152        ]))
2153        + ", "
2154        + (', '.join([
2155            (
2156                f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
2157                + " AS " + sql_item_name(c + '_backtrack', self.flavor, None)
2158            ) for c in backtrack_cols_types
2159        ]))
2160        + f"\nFROM {temp_table_names['delta']}\n"
2161        + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n"
2162        + '\nAND\n'.join([
2163            (
2164                f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
2165                + ", "
2166                + get_null_replacement(
2167                    get_col_typ(c, on_cols),
2168                    self.flavor
2169                ) + ")"
2170                + ' = '
2171                + f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
2172                + ", "
2173                + get_null_replacement(
2174                    get_col_typ(c, on_cols),
2175                    self.flavor
2176                ) + ")"
2177            ) for c, typ in on_cols.items()
2178        ])
2179    )
2180
2181    create_joined_query = get_create_table_queries(
2182        select_joined_query,
2183        temp_tables['joined'],
2184        self.flavor,
2185        schema=internal_schema,
2186    )[0]
2187    create_joined_success, create_joined_msg = session_execute(
2188        session,
2189        create_joined_query,
2190        debug=debug,
2191    ) if on_cols and not upsert else (True, "Success")
2192    if not create_joined_success:
2193        _ = clean_up_temp_tables()
2194        return create_joined_success, create_joined_msg
2195
2196    select_unseen_query = (
2197        "SELECT "
2198        + (', '.join([
2199            (
2200                "CASE\n    WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2201                + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
2202                + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2203                + "\n    ELSE NULL\nEND "
2204                + " AS " + sql_item_name(c, self.flavor, None)
2205            ) for c, typ in delta_cols.items()
2206        ]))
2207        + f"\nFROM {temp_table_names['joined']}\n"
2208        + "WHERE "
2209        + '\nAND\n'.join([
2210            (
2211                sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
2212            ) for c in delta_cols
2213        ])
2214    )
2215    create_unseen_query = get_create_table_queries(
2216        select_unseen_query,
2217        temp_tables['unseen'],
2218        self.flavor,
2219        internal_schema,
2220    )[0]
2221    (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
2222        session,
2223        create_unseen_query,
2224        with_results=True,
2225        debug=debug
2226    ) if not upsert else ((True, "Success"), None)
2227    if not create_unseen_success:
2228        _ = clean_up_temp_tables()
2229        return create_unseen_success, create_unseen_msg
2230
2231    select_update_query = (
2232        "SELECT "
2233        + (', '.join([
2234            (
2235                "CASE\n    WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2236                + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
2237                + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2238                + "\n    ELSE NULL\nEND "
2239                + " AS " + sql_item_name(c, self.flavor, None)
2240            ) for c, typ in delta_cols.items()
2241        ]))
2242        + f"\nFROM {temp_table_names['joined']}\n"
2243        + "WHERE "
2244        + '\nOR\n'.join([
2245            (
2246                sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
2247            ) for c in delta_cols
2248        ])
2249    )
2250
2251    create_update_query = get_create_table_queries(
2252        select_update_query,
2253        temp_tables['update'],
2254        self.flavor,
2255        internal_schema,
2256    )[0]
2257    (create_update_success, create_update_msg), create_update_results = session_execute(
2258        session,
2259        create_update_query,
2260        with_results=True,
2261        debug=debug,
2262    ) if on_cols and not upsert else ((True, "Success"), [])
2263    apply_update_queries = (
2264        get_update_queries(
2265            pipe.target,
2266            temp_tables['update'],
2267            session,
2268            on_cols,
2269            upsert=upsert,
2270            schema=self.get_pipe_schema(pipe),
2271            patch_schema=internal_schema,
2272            datetime_col=pipe.columns.get('datetime', None),
2273            flavor=self.flavor,
2274            debug=debug,
2275        )
2276        if on_cols else []
2277    )
2278
2279    apply_unseen_queries = [
2280        (
2281            f"INSERT INTO {pipe_name} ({delta_cols_str})\n"
2282            + f"SELECT {delta_cols_str}\nFROM "
2283            + (
2284                temp_table_names['unseen']
2285                if on_cols
2286                else temp_table_names['delta']
2287            )
2288        ),
2289    ]
2290
2291    (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute(
2292        session,
2293        apply_unseen_queries,
2294        with_results=True,
2295        debug=debug,
2296    ) if not upsert else ((True, "Success"), None)
2297    if not apply_unseen_success:
2298        _ = clean_up_temp_tables()
2299        return apply_unseen_success, apply_unseen_msg
2300    unseen_count = apply_unseen_results[0].rowcount if apply_unseen_results else 0
2301
2302    (apply_update_success, apply_update_msg), apply_update_results = session_execute(
2303        session,
2304        apply_update_queries,
2305        with_results=True,
2306        debug=debug,
2307    )
2308    if not apply_update_success:
2309        _ = clean_up_temp_tables()
2310        return apply_update_success, apply_update_msg
2311    update_count = apply_update_results[0].rowcount if apply_update_results else 0
2312
2313    session.commit()
2314
2315    msg = (
2316        f"Inserted {unseen_count}, updated {update_count} rows."
2317        if not upsert
2318        else f"Upserted {update_count} row" + ('s' if update_count != 1 else '') + "."
2319    )
2320    _ = clean_up_temp_tables(ready_to_drop=True)
2321
2322    return True, msg

If a pipe's connector is the same as its instance connector, it's more efficient to sync the pipe in-place rather than reading data into Pandas.

Parameters
  • pipe (mrsm.Pipe): The pipe whose connector is the same as its instance.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.
  • begin (Union[datetime, int, None], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Union[datetime, int, None], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple.
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, debug: bool = False) -> Union[datetime.datetime, int, NoneType]:
2325def get_sync_time(
2326    self,
2327    pipe: 'mrsm.Pipe',
2328    params: Optional[Dict[str, Any]] = None,
2329    newest: bool = True,
2330    debug: bool = False,
2331) -> Union[datetime, int, None]:
2332    """Get a Pipe's most recent datetime value.
2333
2334    Parameters
2335    ----------
2336    pipe: mrsm.Pipe
2337        The pipe to get the sync time for.
2338
2339    params: Optional[Dict[str, Any]], default None
2340        Optional params dictionary to build the `WHERE` clause.
2341        See `meerschaum.utils.sql.build_where`.
2342
2343    newest: bool, default True
2344        If `True`, get the most recent datetime (honoring `params`).
2345        If `False`, get the oldest datetime (ASC instead of DESC).
2346
2347    Returns
2348    -------
2349    A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`.
2350    """
2351    from meerschaum.utils.sql import sql_item_name, build_where
2352    table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2353
2354    dt_col = pipe.columns.get('datetime', None)
2355    if dt_col is None:
2356        return None
2357    dt_col_name = sql_item_name(dt_col, self.flavor, None)
2358
2359    ASC_or_DESC = "DESC" if newest else "ASC"
2360    existing_cols = pipe.get_columns_types(debug=debug)
2361    valid_params = {}
2362    if params is not None:
2363        valid_params = {k: v for k, v in params.items() if k in existing_cols}
2364
2365    ### If no bounds are provided for the datetime column,
2366    ### add IS NOT NULL to the WHERE clause.
2367    if dt_col not in valid_params:
2368        valid_params[dt_col] = '_None'
2369    where = "" if not valid_params else build_where(valid_params, self)
2370    q = f"SELECT {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}\nLIMIT 1"
2371    if self.flavor == 'mssql':
2372        q = f"SELECT TOP 1 {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}"
2373    elif self.flavor == 'oracle':
2374        q = (
2375            "SELECT * FROM (\n"
2376            + f"    SELECT {dt_col_name}\nFROM {table}{where}\n    "
2377            + f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2378            + ") WHERE ROWNUM = 1"
2379        )
2380
2381    try:
2382        db_time = self.value(q, silent=True, debug=debug)
2383
2384        ### No datetime could be found.
2385        if db_time is None:
2386            return None
2387        ### sqlite returns str.
2388        if isinstance(db_time, str):
2389            from meerschaum.utils.packages import attempt_import
2390            dateutil_parser = attempt_import('dateutil.parser')
2391            st = dateutil_parser.parse(db_time)
2392        ### Do nothing if a datetime object is returned.
2393        elif isinstance(db_time, datetime):
2394            if hasattr(db_time, 'to_pydatetime'):
2395                st = db_time.to_pydatetime()
2396            else:
2397                st = db_time
2398        ### Sometimes the datetime is actually a date.
2399        elif isinstance(db_time, date):
2400            st = datetime.combine(db_time, datetime.min.time())
2401        ### Adding support for an integer datetime axis.
2402        elif 'int' in str(type(db_time)).lower():
2403            st = int(db_time)
2404        ### Convert pandas timestamp to Python datetime.
2405        else:
2406            st = db_time.to_pydatetime()
2407
2408        sync_time = st
2409
2410    except Exception as e:
2411        sync_time = None
2412        warn(str(e))
2413
2414    return sync_time

Get a Pipe's most recent datetime value.

Parameters
  • pipe (mrsm.Pipe): The pipe to get the sync time for.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.
  • newest (bool, default True): If True, get the most recent datetime (honoring params). If False, get the oldest datetime (ASC instead of DESC).
Returns
  • A datetime object (or int if using an integer axis) if the pipe exists, otherwise None.
def pipe_exists(self, pipe: meerschaum.Pipe, debug: bool = False) -> bool:
2417def pipe_exists(
2418    self,
2419    pipe: mrsm.Pipe,
2420    debug: bool = False
2421) -> bool:
2422    """
2423    Check that a Pipe's table exists.
2424
2425    Parameters
2426    ----------
2427    pipe: mrsm.Pipe:
2428        The pipe to check.
2429
2430    debug: bool, default False
2431        Verbosity toggle.
2432
2433    Returns
2434    -------
2435    A `bool` corresponding to whether a pipe's table exists.
2436
2437    """
2438    from meerschaum.utils.sql import table_exists
2439    exists = table_exists(
2440        pipe.target,
2441        self,
2442        schema=self.get_pipe_schema(pipe),
2443        debug=debug,
2444    )
2445    if debug:
2446        from meerschaum.utils.debug import dprint
2447        dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
2448    return exists

Check that a Pipe's table exists.

Parameters
  • pipe (mrsm.Pipe:): The pipe to check.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A bool corresponding to whether a pipe's table exists.
def get_pipe_rowcount( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, remote: bool = False, debug: bool = False) -> Optional[int]:
2451def get_pipe_rowcount(
2452    self,
2453    pipe: mrsm.Pipe,
2454    begin: Union[datetime, int, None] = None,
2455    end: Union[datetime, int, None] = None,
2456    params: Optional[Dict[str, Any]] = None,
2457    remote: bool = False,
2458    debug: bool = False
2459) -> Union[int, None]:
2460    """
2461    Get the rowcount for a pipe in accordance with given parameters.
2462
2463    Parameters
2464    ----------
2465    pipe: mrsm.Pipe
2466        The pipe to query with.
2467
2468    begin: Union[datetime, int, None], default None
2469        The begin datetime value.
2470
2471    end: Union[datetime, int, None], default None
2472        The end datetime value.
2473
2474    params: Optional[Dict[str, Any]], default None
2475        See `meerschaum.utils.sql.build_where`.
2476
2477    remote: bool, default False
2478        If `True`, get the rowcount for the remote table.
2479
2480    debug: bool, default False
2481        Verbosity toggle.
2482
2483    Returns
2484    -------
2485    An `int` for the number of rows if the `pipe` exists, otherwise `None`.
2486
2487    """
2488    from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte
2489    from meerschaum.connectors.sql._fetch import get_pipe_query
2490    if remote:
2491        msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount."
2492        if 'fetch' not in pipe.parameters:
2493            error(msg)
2494            return None
2495        if 'definition' not in pipe.parameters['fetch']:
2496            error(msg)
2497            return None
2498
2499    _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2500
2501    if not pipe.columns.get('datetime', None):
2502        _dt = pipe.guess_datetime()
2503        dt = sql_item_name(_dt, self.flavor, None) if _dt else None
2504        is_guess = True
2505    else:
2506        _dt = pipe.get_columns('datetime')
2507        dt = sql_item_name(_dt, self.flavor, None)
2508        is_guess = False
2509
2510    if begin is not None or end is not None:
2511        if is_guess:
2512            if _dt is None:
2513                warn(
2514                    f"No datetime could be determined for {pipe}."
2515                    + "\n    Ignoring begin and end...",
2516                    stack=False,
2517                )
2518                begin, end = None, None
2519            else:
2520                warn(
2521                    f"A datetime wasn't specified for {pipe}.\n"
2522                    + f"    Using column \"{_dt}\" for datetime bounds...",
2523                    stack=False,
2524                )
2525
2526
2527    _datetime_name = sql_item_name(
2528        _dt,
2529        (
2530            pipe.instance_connector.flavor
2531            if not remote
2532            else pipe.connector.flavor
2533        ),
2534        None,
2535    )
2536    _cols_names = [
2537        sql_item_name(
2538            col,
2539            (
2540                pipe.instance_connector.flavor
2541                if not remote
2542                else pipe.connector.flavor
2543            ),
2544            None,
2545        )
2546        for col in set(
2547            (
2548                [_dt]
2549                if _dt
2550                else []
2551            )
2552            + (
2553                []
2554                if params is None
2555                else list(params.keys())
2556            )
2557        )
2558    ]
2559    if not _cols_names:
2560        _cols_names = ['*']
2561
2562    src = (
2563        f"SELECT {', '.join(_cols_names)} FROM {_pipe_name}"
2564        if not remote
2565        else get_pipe_query(pipe)
2566    )
2567    parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', self.flavor)}"
2568    query = wrap_query_with_cte(src, parent_query, self.flavor)
2569    if begin is not None or end is not None:
2570        query += "\nWHERE"
2571    if begin is not None:
2572        query += f"""
2573        {dt} >= {dateadd_str(self.flavor, datepart='minute', number=0, begin=begin)}
2574        """
2575    if end is not None and begin is not None:
2576        query += "AND"
2577    if end is not None:
2578        query += f"""
2579        {dt} < {dateadd_str(self.flavor, datepart='minute', number=0, begin=end)}
2580        """
2581    if params is not None:
2582        from meerschaum.utils.sql import build_where
2583        existing_cols = pipe.get_columns_types(debug=debug)
2584        valid_params = {k: v for k, v in params.items() if k in existing_cols}
2585        if valid_params:
2586            query += build_where(valid_params, self).replace('WHERE', (
2587                'AND' if (begin is not None or end is not None)
2588                    else 'WHERE'
2589                )
2590            )
2591
2592    result = self.value(query, debug=debug, silent=True)
2593    try:
2594        return int(result)
2595    except Exception as e:
2596        return None

Get the rowcount for a pipe in accordance with given parameters.

Parameters
  • pipe (mrsm.Pipe): The pipe to query with.
  • begin (Union[datetime, int, None], default None): The begin datetime value.
  • end (Union[datetime, int, None], default None): The end datetime value.
  • params (Optional[Dict[str, Any]], default None): See meerschaum.utils.sql.build_where.
  • remote (bool, default False): If True, get the rowcount for the remote table.
  • debug (bool, default False): Verbosity toggle.
Returns
  • An int for the number of rows if the pipe exists, otherwise None.
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kw) -> Tuple[bool, str]:
2599def drop_pipe(
2600    self,
2601    pipe: mrsm.Pipe,
2602    debug: bool = False,
2603    **kw
2604) -> SuccessTuple:
2605    """
2606    Drop a pipe's tables but maintain its registration.
2607
2608    Parameters
2609    ----------
2610    pipe: mrsm.Pipe
2611        The pipe to drop.
2612
2613    Returns
2614    -------
2615    A `SuccessTuple` indicated success.
2616    """
2617    from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
2618    success = True
2619    target = pipe.target
2620    target_name = (
2621        sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
2622    )
2623    if table_exists(target, self, debug=debug):
2624        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
2625        success = self.exec(
2626            f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
2627        ) is not None
2628
2629    msg = "Success" if success else f"Failed to drop {pipe}."
2630    return success, msg

Drop a pipe's tables but maintain its registration.

Parameters
  • pipe (mrsm.Pipe): The pipe to drop.
Returns
  • A SuccessTuple indicated success.
def clear_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
2633def clear_pipe(
2634    self,
2635    pipe: mrsm.Pipe,
2636    begin: Union[datetime, int, None] = None,
2637    end: Union[datetime, int, None] = None,
2638    params: Optional[Dict[str, Any]] = None,
2639    debug: bool = False,
2640    **kw
2641) -> SuccessTuple:
2642    """
2643    Delete a pipe's data within a bounded or unbounded interval without dropping the table.
2644
2645    Parameters
2646    ----------
2647    pipe: mrsm.Pipe
2648        The pipe to clear.
2649        
2650    begin: Union[datetime, int, None], default None
2651        Beginning datetime. Inclusive.
2652
2653    end: Union[datetime, int, None], default None
2654         Ending datetime. Exclusive.
2655
2656    params: Optional[Dict[str, Any]], default None
2657         See `meerschaum.utils.sql.build_where`.
2658
2659    """
2660    if not pipe.exists(debug=debug):
2661        return True, f"{pipe} does not exist, so nothing was cleared."
2662
2663    from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str
2664    pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2665
2666    if not pipe.columns.get('datetime', None):
2667        _dt = pipe.guess_datetime()
2668        dt_name = sql_item_name(_dt, self.flavor, None) if _dt else None
2669        is_guess = True
2670    else:
2671        _dt = pipe.get_columns('datetime')
2672        dt_name = sql_item_name(_dt, self.flavor, None)
2673        is_guess = False
2674
2675    if begin is not None or end is not None:
2676        if is_guess:
2677            if _dt is None:
2678                warn(
2679                    f"No datetime could be determined for {pipe}."
2680                    + "\n    Ignoring datetime bounds...",
2681                    stack = False,
2682                )
2683                begin, end = None, None
2684            else:
2685                warn(
2686                    f"A datetime wasn't specified for {pipe}.\n"
2687                    + f"    Using column \"{_dt}\" for datetime bounds...",
2688                    stack = False,
2689                )
2690
2691    valid_params = {}
2692    if params is not None:
2693        existing_cols = pipe.get_columns_types(debug=debug)
2694        valid_params = {k: v for k, v in params.items() if k in existing_cols}
2695    clear_query = (
2696        f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n"
2697        + ('  AND ' + build_where(valid_params, self, with_where=False) if valid_params else '')
2698        + (
2699            f'  AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin)
2700            if begin is not None else ''
2701        ) + (
2702            f'  AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end)
2703            if end is not None else ''
2704        )
2705    )
2706    success = self.exec(clear_query, silent=True, debug=debug) is not None
2707    msg = "Success" if success else f"Failed to clear {pipe}."
2708    return success, msg

Delete a pipe's data within a bounded or unbounded interval without dropping the table.

Parameters
  • pipe (mrsm.Pipe): The pipe to clear.
  • begin (Union[datetime, int, None], default None): Beginning datetime. Inclusive.
  • end (Union[datetime, int, None], default None): Ending datetime. Exclusive.
  • params (Optional[Dict[str, Any]], default None): See meerschaum.utils.sql.build_where.
def deduplicate_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
3299def deduplicate_pipe(
3300    self,
3301    pipe: mrsm.Pipe,
3302    begin: Union[datetime, int, None] = None,
3303    end: Union[datetime, int, None] = None,
3304    params: Optional[Dict[str, Any]] = None,
3305    debug: bool = False,
3306    **kwargs: Any
3307) -> SuccessTuple:
3308    """
3309    Delete duplicate values within a pipe's table.
3310
3311    Parameters
3312    ----------
3313    pipe: mrsm.Pipe
3314        The pipe whose table to deduplicate.
3315
3316    begin: Union[datetime, int, None], default None
3317        If provided, only deduplicate values greater than or equal to this value.
3318
3319    end: Union[datetime, int, None], default None
3320        If provided, only deduplicate values less than this value.
3321
3322    params: Optional[Dict[str, Any]], default None
3323        If provided, further limit deduplication to values which match this query dictionary.
3324
3325    debug: bool, default False
3326        Verbosity toggle.
3327
3328    Returns
3329    -------
3330    A `SuccessTuple` indicating success.
3331    """
3332    from meerschaum.utils.sql import (
3333        sql_item_name,
3334        NO_CTE_FLAVORS,
3335        get_rename_table_queries,
3336        NO_SELECT_INTO_FLAVORS,
3337        DROP_IF_EXISTS_FLAVORS,
3338        get_create_table_query,
3339        format_cte_subquery,
3340        get_null_replacement,
3341    )
3342    from meerschaum.utils.misc import generate_password, flatten_list
3343
3344    pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
3345
3346    if not pipe.exists(debug=debug):
3347        return False, f"Table {pipe_table_name} does not exist."
3348
3349    ### TODO: Handle deleting duplicates without a datetime axis.
3350    dt_col = pipe.columns.get('datetime', None)
3351    dt_col_name = sql_item_name(dt_col, self.flavor, None)
3352    cols_types = pipe.get_columns_types(debug=debug)
3353    existing_cols = pipe.get_columns_types(debug=debug)
3354
3355    get_rowcount_query = f"SELECT COUNT(*) FROM {pipe_table_name}"
3356    old_rowcount = self.value(get_rowcount_query, debug=debug)
3357    if old_rowcount is None:
3358        return False, f"Failed to get rowcount for table {pipe_table_name}."
3359
3360    ### Non-datetime indices that in fact exist.
3361    indices = [
3362        col
3363        for key, col in pipe.columns.items()
3364        if col and col != dt_col and col in cols_types
3365    ]
3366    indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices]
3367    existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols]
3368    duplicates_cte_name = sql_item_name('dups', self.flavor, None)
3369    duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
3370    previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
3371
3372    index_list_str = (
3373        sql_item_name(dt_col, self.flavor, None)
3374        if dt_col
3375        else ''
3376    )
3377    index_list_str_ordered = (
3378        (
3379            sql_item_name(dt_col, self.flavor, None) + " DESC"
3380        )
3381        if dt_col
3382        else ''
3383    )
3384    if indices:
3385        index_list_str += ', ' + ', '.join(indices_names)
3386        index_list_str_ordered += ', ' + ', '.join(indices_names)
3387    if index_list_str.startswith(','):
3388        index_list_str = index_list_str.lstrip(',').lstrip()
3389    if index_list_str_ordered.startswith(','):
3390        index_list_str_ordered = index_list_str_ordered.lstrip(',').lstrip()
3391
3392    cols_list_str = ', '.join(existing_cols_names)
3393
3394    try:
3395        ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()).
3396        is_old_mysql = (
3397            self.flavor in ('mysql', 'mariadb')
3398            and
3399            int(self.db_version.split('.')[0]) < 8
3400        )
3401    except Exception as e:
3402        is_old_mysql = False
3403
3404    src_query = f"""
3405        SELECT
3406            {cols_list_str},
3407            ROW_NUMBER() OVER (
3408                PARTITION BY
3409                {index_list_str}
3410                ORDER BY {index_list_str_ordered}
3411            ) AS {duplicate_row_number_name}
3412        FROM {pipe_table_name}
3413    """
3414    duplicates_cte_subquery = format_cte_subquery(
3415        src_query,
3416        self.flavor,
3417        sub_name = 'src',
3418        cols_to_select = cols_list_str,
3419    ) + f"""
3420        WHERE {duplicate_row_number_name} = 1
3421        """
3422    old_mysql_query = (
3423        f"""
3424        SELECT
3425            {index_list_str}
3426        FROM (
3427          SELECT
3428            {index_list_str},
3429            IF(
3430                @{previous_row_number_name} <> {index_list_str.replace(', ', ' + ')},
3431                @{duplicate_row_number_name} := 0,
3432                @{duplicate_row_number_name}
3433            ),
3434            @{previous_row_number_name} := {index_list_str.replace(', ', ' + ')},
3435            @{duplicate_row_number_name} := @{duplicate_row_number_name} + 1 AS """
3436        + f"""{duplicate_row_number_name}
3437          FROM
3438            {pipe_table_name},
3439            (
3440                SELECT @{duplicate_row_number_name} := 0
3441            ) AS {duplicate_row_number_name},
3442            (
3443                SELECT @{previous_row_number_name} := '{get_null_replacement('str', 'mysql')}'
3444            ) AS {previous_row_number_name}
3445          ORDER BY {index_list_str_ordered}
3446        ) AS t
3447        WHERE {duplicate_row_number_name} = 1
3448        """
3449    )
3450    if is_old_mysql:
3451        duplicates_cte_subquery = old_mysql_query
3452
3453    session_id = generate_password(3)
3454
3455    dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3456    temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3457
3458    dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
3459    temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3460
3461    create_temporary_table_query = get_create_table_query(
3462        duplicates_cte_subquery,
3463        dedup_table,
3464        self.flavor,
3465    ) + f"""
3466    ORDER BY {index_list_str_ordered}
3467    """
3468    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3469    alter_queries = flatten_list([
3470        get_rename_table_queries(
3471            pipe.target, temp_old_table, self.flavor, schema=self.get_pipe_schema(pipe)
3472        ),
3473        get_rename_table_queries(
3474            dedup_table, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)
3475        ),
3476        f"""
3477        DROP TABLE {if_exists_str} {temp_old_table_name}
3478        """,
3479    ])
3480
3481    create_temporary_result = self.execute(create_temporary_table_query, debug=debug)
3482    if create_temporary_result is None:
3483        return False, f"Failed to deduplicate table {pipe_table_name}."
3484
3485    results = self.exec_queries(
3486        alter_queries,
3487        break_on_error=True,
3488        rollback=True,
3489        debug=debug,
3490    )
3491
3492    fail_query = None
3493    for result, query in zip(results, alter_queries):
3494        if result is None:
3495            fail_query = query
3496            break
3497    success = fail_query is None
3498
3499    new_rowcount = (
3500        self.value(get_rowcount_query, debug=debug)
3501        if success
3502        else None
3503    )
3504
3505    msg = (
3506        (
3507            f"Successfully deduplicated table {pipe_table_name}"
3508            + (
3509                f"\nfrom {old_rowcount} to {new_rowcount} rows"
3510                if old_rowcount != new_rowcount
3511                else ''
3512            )
3513            + '.'
3514        )
3515        if success
3516        else f"Failed to execute query:\n{fail_query}"
3517    )
3518    return success, msg

Delete duplicate values within a pipe's table.

Parameters
  • pipe (mrsm.Pipe): The pipe whose table to deduplicate.
  • begin (Union[datetime, int, None], default None): If provided, only deduplicate values greater than or equal to this value.
  • end (Union[datetime, int, None], default None): If provided, only deduplicate values less than this value.
  • params (Optional[Dict[str, Any]], default None): If provided, further limit deduplication to values which match this query dictionary.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success.
def get_pipe_table( self, pipe: meerschaum.Pipe, debug: bool = False) -> "Union['sqlalchemy.Table', None]":
2711def get_pipe_table(
2712    self,
2713    pipe: mrsm.Pipe,
2714    debug: bool = False,
2715) -> Union['sqlalchemy.Table', None]:
2716    """
2717    Return the `sqlalchemy.Table` object for a `mrsm.Pipe`.
2718
2719    Parameters
2720    ----------
2721    pipe: mrsm.Pipe:
2722        The pipe in question.
2723
2724    Returns
2725    -------
2726    A `sqlalchemy.Table` object. 
2727
2728    """
2729    from meerschaum.utils.sql import get_sqlalchemy_table
2730    if not pipe.exists(debug=debug):
2731        return None
2732    return get_sqlalchemy_table(
2733        pipe.target,
2734        connector=self,
2735        schema=self.get_pipe_schema(pipe),
2736        debug=debug,
2737        refresh=True,
2738    )

Return the sqlalchemy.Table object for a mrsm.Pipe.

Parameters
  • pipe (mrsm.Pipe:): The pipe in question.
Returns
  • A sqlalchemy.Table object.
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, str]:
2741def get_pipe_columns_types(
2742    self,
2743    pipe: mrsm.Pipe,
2744    debug: bool = False,
2745) -> Dict[str, str]:
2746    """
2747    Get the pipe's columns and types.
2748
2749    Parameters
2750    ----------
2751    pipe: mrsm.Pipe:
2752        The pipe to get the columns for.
2753
2754    Returns
2755    -------
2756    A dictionary of columns names (`str`) and types (`str`).
2757
2758    Examples
2759    --------
2760    >>> conn.get_pipe_columns_types(pipe)
2761    {
2762      'dt': 'TIMESTAMP WITHOUT TIMEZONE',
2763      'id': 'BIGINT',
2764      'val': 'DOUBLE PRECISION',
2765    }
2766    >>> 
2767    """
2768    from meerschaum.utils.sql import get_table_cols_types
2769    if not pipe.exists(debug=debug):
2770        return {}
2771
2772    if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite'):
2773        return get_table_cols_types(
2774            pipe.target,
2775            self,
2776            flavor=self.flavor,
2777            schema=self.get_pipe_schema(pipe),
2778            debug=debug,
2779        )
2780
2781    table_columns = {}
2782    try:
2783        pipe_table = self.get_pipe_table(pipe, debug=debug)
2784        if pipe_table is None:
2785            return {}
2786        for col in pipe_table.columns:
2787            table_columns[str(col.name)] = str(col.type)
2788    except Exception as e:
2789        import traceback
2790        traceback.print_exc()
2791        warn(e)
2792        table_columns = {}
2793
2794    return table_columns

Get the pipe's columns and types.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get the columns for.
Returns
  • A dictionary of columns names (str) and types (str).
Examples
>>> conn.get_pipe_columns_types(pipe)
{
  'dt': 'TIMESTAMP WITHOUT TIMEZONE',
  'id': 'BIGINT',
  'val': 'DOUBLE PRECISION',
}
>>>
def get_to_sql_dtype( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'", update_dtypes: bool = True) -> "Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']":
3244def get_to_sql_dtype(
3245    self,
3246    pipe: 'mrsm.Pipe',
3247    df: 'pd.DataFrame',
3248    update_dtypes: bool = True,
3249) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
3250    """
3251    Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
3252
3253    Parameters
3254    ----------
3255    pipe: mrsm.Pipe
3256        The pipe which may contain a `dtypes` parameter.
3257
3258    df: pd.DataFrame
3259        The DataFrame to be pushed via `to_sql()`.
3260
3261    update_dtypes: bool, default True
3262        If `True`, patch the pipe's dtypes onto the DataFrame's dtypes.
3263
3264    Returns
3265    -------
3266    A dictionary with `sqlalchemy` datatypes.
3267
3268    Examples
3269    --------
3270    >>> import pandas as pd
3271    >>> import meerschaum as mrsm
3272    >>> 
3273    >>> conn = mrsm.get_connector('sql:memory')
3274    >>> df = pd.DataFrame([{'a': {'b': 1}}])
3275    >>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
3276    >>> get_to_sql_dtype(pipe, df)
3277    {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3278    """
3279    from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
3280    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3281    df_dtypes = {
3282        col: str(typ)
3283        for col, typ in df.dtypes.items()
3284    }
3285    json_cols = get_json_cols(df)
3286    numeric_cols = get_numeric_cols(df)
3287    uuid_cols = get_uuid_cols(df)
3288    df_dtypes.update({col: 'json' for col in json_cols})
3289    df_dtypes.update({col: 'numeric' for col in numeric_cols})
3290    df_dtypes.update({col: 'uuid' for col in uuid_cols})
3291    if update_dtypes:
3292        df_dtypes.update(pipe.dtypes)
3293    return {
3294        col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
3295        for col, typ in df_dtypes.items()
3296    }

Given a pipe and DataFrame, return the dtype dictionary for to_sql().

Parameters
  • pipe (mrsm.Pipe): The pipe which may contain a dtypes parameter.
  • df (pd.DataFrame): The DataFrame to be pushed via to_sql().
  • update_dtypes (bool, default True): If True, patch the pipe's dtypes onto the DataFrame's dtypes.
Returns
  • A dictionary with sqlalchemy datatypes.
Examples
>>> import pandas as pd
>>> import meerschaum as mrsm
>>> 
>>> conn = mrsm.get_connector('sql:memory')
>>> df = pd.DataFrame([{'a': {'b': 1}}])
>>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
>>> get_to_sql_dtype(pipe, df)
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
def get_pipe_schema(self, pipe: meerschaum.Pipe) -> Optional[str]:
3521def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]:
3522    """
3523    Return the schema to use for this pipe.
3524    First check `pipe.parameters['schema']`, then check `self.schema`.
3525
3526    Parameters
3527    ----------
3528    pipe: mrsm.Pipe
3529        The pipe which may contain a configured schema.
3530
3531    Returns
3532    -------
3533    A schema string or `None` if nothing is configured.
3534    """
3535    return pipe.parameters.get('schema', self.schema)

Return the schema to use for this pipe. First check pipe.parameters['schema'], then check self.schema.

Parameters
  • pipe (mrsm.Pipe): The pipe which may contain a configured schema.
Returns
  • A schema string or None if nothing is configured.
def create_pipe_table_from_df( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'", debug: bool = False) -> Tuple[bool, str]:
1334def create_pipe_table_from_df(
1335    self,
1336    pipe: mrsm.Pipe,
1337    df: 'pd.DataFrame',
1338    debug: bool = False,
1339) -> mrsm.SuccessTuple:
1340    """
1341    Create a pipe's table from its configured dtypes and an incoming dataframe.
1342    """
1343    from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1344    from meerschaum.utils.sql import get_create_table_queries, sql_item_name
1345    primary_key = pipe.columns.get('primary', None)
1346    dt_col = pipe.columns.get('datetime', None)
1347    new_dtypes = {
1348        **{
1349            col: str(typ)
1350            for col, typ in df.dtypes.items()
1351        },
1352        **{
1353            col: str(df.dtypes.get(col, 'int'))
1354            for col_ix, col in pipe.columns.items()
1355            if col and col_ix != 'primary'
1356        },
1357        **{
1358            col: 'uuid'
1359            for col in get_uuid_cols(df)
1360        },
1361        **{
1362            col: 'json'
1363            for col in get_json_cols(df)
1364        },
1365        **{
1366            col: 'numeric'
1367            for col in get_numeric_cols(df)
1368        },
1369        **pipe.dtypes
1370    }
1371    autoincrement = (
1372        pipe.parameters.get('autoincrement', False)
1373        or (primary_key and primary_key not in new_dtypes)
1374    )
1375    if autoincrement:
1376        _ = new_dtypes.pop(primary_key, None)
1377
1378    create_table_queries = get_create_table_queries(
1379        new_dtypes,
1380        pipe.target,
1381        self.flavor,
1382        schema=self.get_pipe_schema(pipe),
1383        primary_key=primary_key,
1384        datetime_column=dt_col,
1385    )
1386    success = all(
1387        self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
1388    )
1389    target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor)
1390    msg = (
1391        "Success"
1392        if success
1393        else f"Failed to create {target_name}."
1394    )
1395    return success, msg

Create a pipe's table from its configured dtypes and an incoming dataframe.

def get_pipe_columns_indices( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[Dict[str, str]]]:
2797def get_pipe_columns_indices(
2798    self,
2799    pipe: mrsm.Pipe,
2800    debug: bool = False,
2801) -> Dict[str, List[Dict[str, str]]]:
2802    """
2803    Return a dictionary mapping columns to the indices created on those columns.
2804
2805    Parameters
2806    ----------
2807    pipe: mrsm.Pipe
2808        The pipe to be queried against.
2809
2810    Returns
2811    -------
2812    A dictionary mapping columns names to lists of dictionaries.
2813    The dictionaries in the lists contain the name and type of the indices.
2814    """
2815    if pipe.__dict__.get('_skip_check_indices', False):
2816        return {}
2817    from meerschaum.utils.sql import get_table_cols_indices
2818    return get_table_cols_indices(
2819        pipe.target,
2820        self,
2821        flavor=self.flavor,
2822        schema=self.get_pipe_schema(pipe),
2823        debug=debug,
2824    )

Return a dictionary mapping columns to the indices created on those columns.

Parameters
  • pipe (mrsm.Pipe): The pipe to be queried against.
Returns
  • A dictionary mapping columns names to lists of dictionaries.
  • The dictionaries in the lists contain the name and type of the indices.
def register_plugin( self, plugin: meerschaum.Plugin, force: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
17def register_plugin(
18    self,
19    plugin: 'mrsm.core.Plugin',
20    force: bool = False,
21    debug: bool = False,
22    **kw: Any
23) -> SuccessTuple:
24    """Register a new plugin to the plugins table."""
25    from meerschaum.utils.warnings import warn, error
26    from meerschaum.utils.packages import attempt_import
27    sqlalchemy = attempt_import('sqlalchemy')
28    from meerschaum.utils.sql import json_flavors
29    from meerschaum.connectors.sql.tables import get_tables
30    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
31
32    old_id = self.get_plugin_id(plugin, debug=debug)
33
34    ### Check for version conflict. May be overridden with `--force`.
35    if old_id is not None and not force:
36        old_version = self.get_plugin_version(plugin, debug=debug)
37        new_version = plugin.version
38        if old_version is None:
39            old_version = ''
40        if new_version is None:
41            new_version = ''
42
43        ### verify that the new version is greater than the old
44        packaging_version = attempt_import('packaging.version')
45        if (
46            old_version and new_version
47            and packaging_version.parse(old_version) >= packaging_version.parse(new_version)
48        ):
49            return False, (
50                f"Version '{new_version}' of plugin '{plugin}' " +
51                f"must be greater than existing version '{old_version}'."
52            )
53
54    bind_variables = {
55        'plugin_name': plugin.name,
56        'version': plugin.version,
57        'attributes': (
58            json.dumps(plugin.attributes) if self.flavor not in json_flavors else plugin.attributes
59        ),
60        'user_id': plugin.user_id,
61    }
62
63    if old_id is None:
64        query = sqlalchemy.insert(plugins_tbl).values(**bind_variables)
65    else:
66        query = (
67            sqlalchemy.update(plugins_tbl)
68            .values(**bind_variables)
69            .where(plugins_tbl.c.plugin_id == old_id)
70        )
71
72    result = self.exec(query, debug=debug)
73    if result is None:
74        return False, f"Failed to register plugin '{plugin}'."
75    return True, f"Successfully registered plugin '{plugin}'."

Register a new plugin to the plugins table.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
243def delete_plugin(
244    self,
245    plugin: 'mrsm.core.Plugin',
246    debug: bool = False,
247    **kw: Any
248) -> SuccessTuple:
249    """Delete a plugin from the plugins table."""
250    from meerschaum.utils.warnings import warn, error
251    from meerschaum.utils.packages import attempt_import
252    sqlalchemy = attempt_import('sqlalchemy')
253    from meerschaum.connectors.sql.tables import get_tables
254    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
255
256    plugin_id = self.get_plugin_id(plugin, debug=debug)
257    if plugin_id is None:
258        return True, f"Plugin '{plugin}' was not registered."
259
260    bind_variables = {
261        'plugin_id' : plugin_id,
262    }
263
264    query = sqlalchemy.delete(plugins_tbl).where(plugins_tbl.c.plugin_id == plugin_id)
265    result = self.exec(query, debug=debug)
266    if result is None:
267        return False, f"Failed to delete plugin '{plugin}'."
268    return True, f"Successfully deleted plugin '{plugin}'."

Delete a plugin from the plugins table.

def get_plugin_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[int]:
 77def get_plugin_id(
 78    self,
 79    plugin: 'mrsm.core.Plugin',
 80    debug: bool = False
 81) -> Optional[int]:
 82    """
 83    Return a plugin's ID.
 84    """
 85    ### ensure plugins table exists
 86    from meerschaum.connectors.sql.tables import get_tables
 87    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
 88    from meerschaum.utils.packages import attempt_import
 89    sqlalchemy = attempt_import('sqlalchemy')
 90
 91    query = (
 92        sqlalchemy
 93        .select(plugins_tbl.c.plugin_id)
 94        .where(plugins_tbl.c.plugin_name == plugin.name)
 95    )
 96    
 97    try:
 98        return int(self.value(query, debug=debug))
 99    except Exception as e:
100        return None

Return a plugin's ID.

def get_plugin_version( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
102def get_plugin_version(
103    self,
104    plugin: 'mrsm.core.Plugin',
105    debug: bool = False
106) -> Optional[str]:
107    """
108    Return a plugin's version.
109    """
110    ### ensure plugins table exists
111    from meerschaum.connectors.sql.tables import get_tables
112    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
113    from meerschaum.utils.packages import attempt_import
114    sqlalchemy = attempt_import('sqlalchemy')
115    query = sqlalchemy.select(plugins_tbl.c.version).where(plugins_tbl.c.plugin_name == plugin.name)
116    return self.value(query, debug=debug)

Return a plugin's version.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False, **kw: Any) -> List[str]:
196def get_plugins(
197    self,
198    user_id: Optional[int] = None,
199    search_term: Optional[str] = None,
200    debug: bool = False,
201    **kw: Any
202) -> List[str]:
203    """
204    Return a list of all registered plugins.
205
206    Parameters
207    ----------
208    user_id: Optional[int], default None
209        If specified, filter plugins by a specific `user_id`.
210
211    search_term: Optional[str], default None
212        If specified, add a `WHERE plugin_name LIKE '{search_term}%'` clause to filter the plugins.
213
214
215    Returns
216    -------
217    A list of plugin names.
218    """
219    ### ensure plugins table exists
220    from meerschaum.connectors.sql.tables import get_tables
221    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
222    from meerschaum.utils.packages import attempt_import
223    sqlalchemy = attempt_import('sqlalchemy')
224
225    query = sqlalchemy.select(plugins_tbl.c.plugin_name)
226    if user_id is not None:
227        query = query.where(plugins_tbl.c.user_id == user_id)
228    if search_term is not None:
229        query = query.where(plugins_tbl.c.plugin_name.like(search_term + '%'))
230
231    rows = (
232        self.execute(query).fetchall()
233        if self.flavor != 'duckdb'
234        else [
235            (row['plugin_name'],)
236            for row in self.read(query).to_dict(orient='records')
237        ]
238    )
239    
240    return [row[0] for row in rows]

Return a list of all registered plugins.

Parameters
  • user_id (Optional[int], default None): If specified, filter plugins by a specific user_id.
  • search_term (Optional[str], default None): If specified, add a WHERE plugin_name LIKE '{search_term}%' clause to filter the plugins.
Returns
  • A list of plugin names.
def get_plugin_user_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[int]:
118def get_plugin_user_id(
119    self,
120    plugin: 'mrsm.core.Plugin',
121    debug: bool = False
122) -> Optional[int]:
123    """
124    Return a plugin's user ID.
125    """
126    ### ensure plugins table exists
127    from meerschaum.connectors.sql.tables import get_tables
128    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
129    from meerschaum.utils.packages import attempt_import
130    sqlalchemy = attempt_import('sqlalchemy')
131
132    query = (
133        sqlalchemy
134        .select(plugins_tbl.c.user_id)
135        .where(plugins_tbl.c.plugin_name == plugin.name)
136    )
137
138    try:
139        return int(self.value(query, debug=debug))
140    except Exception as e:
141        return None

Return a plugin's user ID.

def get_plugin_username( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
143def get_plugin_username(
144    self,
145    plugin: 'mrsm.core.Plugin',
146    debug: bool = False
147) -> Optional[str]:
148    """
149    Return the username of a plugin's owner.
150    """
151    ### ensure plugins table exists
152    from meerschaum.connectors.sql.tables import get_tables
153    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
154    users = get_tables(mrsm_instance=self, debug=debug)['users']
155    from meerschaum.utils.packages import attempt_import
156    sqlalchemy = attempt_import('sqlalchemy')
157
158    query = (
159        sqlalchemy.select(users.c.username)
160        .where(
161            users.c.user_id == plugins_tbl.c.user_id
162            and plugins_tbl.c.plugin_name == plugin.name
163        )
164    )
165
166    return self.value(query, debug=debug)

Return the username of a plugin's owner.

def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Dict[str, Any]:
169def get_plugin_attributes(
170    self,
171    plugin: 'mrsm.core.Plugin',
172    debug: bool = False
173) -> Dict[str, Any]:
174    """
175    Return the attributes of a plugin.
176    """
177    ### ensure plugins table exists
178    from meerschaum.connectors.sql.tables import get_tables
179    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
180    from meerschaum.utils.packages import attempt_import
181    sqlalchemy = attempt_import('sqlalchemy')
182
183    query = (
184        sqlalchemy
185        .select(plugins_tbl.c.attributes)
186        .where(plugins_tbl.c.plugin_name == plugin.name)
187    )
188
189    _attr = self.value(query, debug=debug)
190    if isinstance(_attr, str):
191        _attr = json.loads(_attr)
192    elif _attr is None:
193        _attr = {}
194    return _attr

Return the attributes of a plugin.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
16def register_user(
17    self,
18    user: mrsm.core.User,
19    debug: bool = False,
20    **kw: Any
21) -> SuccessTuple:
22    """Register a new user."""
23    from meerschaum.utils.warnings import warn, error, info
24    from meerschaum.utils.packages import attempt_import
25    from meerschaum.utils.sql import json_flavors
26    sqlalchemy = attempt_import('sqlalchemy')
27
28    valid_tuple = valid_username(user.username)
29    if not valid_tuple[0]:
30        return valid_tuple
31
32    old_id = self.get_user_id(user, debug=debug)
33
34    if old_id is not None:
35        return False, f"User '{user}' already exists."
36
37    ### ensure users table exists
38    from meerschaum.connectors.sql.tables import get_tables
39    tables = get_tables(mrsm_instance=self, debug=debug)
40
41    import json
42    bind_variables = {
43        'username': user.username,
44        'email': user.email,
45        'password_hash': user.password_hash,
46        'user_type': user.type,
47        'attributes': (
48            json.dumps(user.attributes) if self.flavor not in json_flavors else user.attributes
49        ),
50    }
51    if old_id is not None:
52        return False, f"User '{user.username}' already exists."
53    if old_id is None:
54        query = (
55            sqlalchemy.insert(tables['users']).
56            values(**bind_variables)
57        )
58
59    result = self.exec(query, debug=debug)
60    if result is None:
61        return False, f"Failed to register user '{user}'."
62    return True, f"Successfully registered user '{user}'."

Register a new user.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[int]:
154def get_user_id(
155    self,
156    user: 'mrsm.core.User',
157    debug: bool = False
158) -> Optional[int]:
159    """If a user is registered, return the `user_id`."""
160    ### ensure users table exists
161    from meerschaum.utils.packages import attempt_import
162    sqlalchemy = attempt_import('sqlalchemy')
163    from meerschaum.connectors.sql.tables import get_tables
164    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
165
166    query = (
167        sqlalchemy.select(users_tbl.c.user_id)
168        .where(users_tbl.c.username == user.username)
169    )
170
171    result = self.value(query, debug=debug)
172    if result is not None:
173        return int(result)
174    return None

If a user is registered, return the user_id.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
248def get_users(
249    self,
250    debug: bool = False,
251    **kw: Any
252) -> List[str]:
253    """
254    Get the registered usernames.
255    """
256    ### ensure users table exists
257    from meerschaum.connectors.sql.tables import get_tables
258    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
259    from meerschaum.utils.packages import attempt_import
260    sqlalchemy = attempt_import('sqlalchemy')
261
262    query = sqlalchemy.select(users_tbl.c.username)
263
264    return list(self.read(query, debug=debug)['username'])

Get the registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
 99def edit_user(
100    self,
101    user: 'mrsm.core.User',
102    debug: bool = False,
103    **kw: Any
104) -> SuccessTuple:
105    """Update an existing user's metadata."""
106    from meerschaum.utils.packages import attempt_import
107    sqlalchemy = attempt_import('sqlalchemy')
108    from meerschaum.connectors.sql.tables import get_tables
109    from meerschaum.utils.sql import json_flavors
110    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
111
112    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
113    if user_id is None:
114        return False, (
115            f"User '{user.username}' does not exist. " +
116            f"Register user '{user.username}' before editing."
117        )
118    user.user_id = user_id
119
120    import json
121    valid_tuple = valid_username(user.username)
122    if not valid_tuple[0]:
123        return valid_tuple
124
125    bind_variables = {
126        'user_id' : user_id,
127        'username' : user.username,
128    }
129    if user.password != '':
130        bind_variables['password_hash'] = user.password_hash
131    if user.email != '':
132        bind_variables['email'] = user.email
133    if user.attributes is not None and user.attributes != {}:
134        bind_variables['attributes'] = (
135            json.dumps(user.attributes) if self.flavor in ('duckdb',)
136            else user.attributes
137        )
138    if user.type != '':
139        bind_variables['user_type'] = user.type
140
141    query = (
142        sqlalchemy
143        .update(users_tbl)
144        .values(**bind_variables)
145        .where(users_tbl.c.user_id == user_id)
146    )
147
148    result = self.exec(query, debug=debug)
149    if result is None:
150        return False, f"Failed to edit user '{user}'."
151    return True, f"Successfully edited user '{user}'."

Update an existing user's metadata.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Tuple[bool, str]:
216def delete_user(
217    self,
218    user: 'mrsm.core.User',
219    debug: bool = False
220) -> SuccessTuple:
221    """Delete a user's record from the users table."""
222    ### ensure users table exists
223    from meerschaum.connectors.sql.tables import get_tables
224    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
225    plugins = get_tables(mrsm_instance=self, debug=debug)['plugins']
226    from meerschaum.utils.packages import attempt_import
227    sqlalchemy = attempt_import('sqlalchemy')
228
229    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
230
231    if user_id is None:
232        return False, f"User '{user.username}' is not registered and cannot be deleted."
233
234    query = sqlalchemy.delete(users_tbl).where(users_tbl.c.user_id == user_id)
235
236    result = self.exec(query, debug=debug)
237    if result is None:
238        return False, f"Failed to delete user '{user}'."
239
240    query = sqlalchemy.delete(plugins).where(plugins.c.user_id == user_id)
241    result = self.exec(query, debug=debug)
242    if result is None:
243        return False, f"Failed to delete plugins of user '{user}'."
244
245    return True, f"Successfully deleted user '{user}'"

Delete a user's record from the users table.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
267def get_user_password_hash(
268    self,
269    user: 'mrsm.core.User',
270    debug: bool = False,
271    **kw: Any
272) -> Optional[str]:
273    """
274    Return the password has for a user.
275    **NOTE**: This may be dangerous and is only allowed if the security settings explicity allow it.
276    """
277    from meerschaum.utils.debug import dprint
278    from meerschaum.connectors.sql.tables import get_tables
279    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
280    from meerschaum.utils.packages import attempt_import
281    sqlalchemy = attempt_import('sqlalchemy')
282
283    if user.user_id is not None:
284        user_id = user.user_id
285        if debug:
286            dprint(f"Already given user_id: {user_id}")
287    else:
288        if debug:
289            dprint("Fetching user_id...")
290        user_id = self.get_user_id(user, debug=debug)
291
292    if user_id is None:
293        return None
294
295    query = sqlalchemy.select(users_tbl.c.password_hash).where(users_tbl.c.user_id == user_id)
296
297    return self.value(query, debug=debug)

Return the password has for a user. NOTE: This may be dangerous and is only allowed if the security settings explicity allow it.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
300def get_user_type(
301    self,
302    user: 'mrsm.core.User',
303    debug: bool = False,
304    **kw: Any
305) -> Optional[str]:
306    """
307    Return the user's type.
308    """
309    from meerschaum.connectors.sql.tables import get_tables
310    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
311    from meerschaum.utils.packages import attempt_import
312    sqlalchemy = attempt_import('sqlalchemy')
313
314    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
315
316    if user_id is None:
317        return None
318
319    query = sqlalchemy.select(users_tbl.c.user_type).where(users_tbl.c.user_id == user_id)
320
321    return self.value(query, debug=debug)

Return the user's type.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[Dict[str, Any]]:
176def get_user_attributes(
177    self,
178    user: 'mrsm.core.User',
179    debug: bool = False
180) -> Union[Dict[str, Any], None]:
181    """
182    Return the user's attributes.
183    """
184    ### ensure users table exists
185    from meerschaum.utils.warnings import warn
186    from meerschaum.utils.packages import attempt_import
187    sqlalchemy = attempt_import('sqlalchemy')
188    from meerschaum.connectors.sql.tables import get_tables
189    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
190
191    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
192
193    query = (
194        sqlalchemy.select(users_tbl.c.attributes)
195        .where(users_tbl.c.user_id == user_id)
196    )
197
198    result = self.value(query, debug=debug)
199    if result is not None and not isinstance(result, dict):
200        try:
201            result = dict(result)
202            _parsed = True
203        except Exception as e:
204            _parsed = False
205        if not _parsed:
206            try:
207                import json
208                result = json.loads(result)
209                _parsed = True
210            except Exception as e:
211                _parsed = False
212        if not _parsed:
213            warn(f"Received unexpected type for attributes: {result}")
214    return result

Return the user's attributes.

@classmethod
def from_uri( cls, uri: str, label: Optional[str] = None, as_dict: bool = False) -> Union[SQLConnector, Dict[str, Union[str, int]]]:
15@classmethod
16def from_uri(
17        cls,
18        uri: str,
19        label: Optional[str] = None,
20        as_dict: bool = False,
21    ) -> Union[
22        'meerschaum.connectors.SQLConnector',
23        Dict[str, Union[str, int]],
24    ]:
25    """
26    Create a new SQLConnector from a URI string.
27
28    Parameters
29    ----------
30    uri: str
31        The URI connection string.
32
33    label: Optional[str], default None
34        If provided, use this as the connector label.
35        Otherwise use the determined database name.
36
37    as_dict: bool, default False
38        If `True`, return a dictionary of the keyword arguments
39        necessary to create a new `SQLConnector`, otherwise create a new object.
40
41    Returns
42    -------
43    A new SQLConnector object or a dictionary of attributes (if `as_dict` is `True`).
44    """
45
46    params = cls.parse_uri(uri)
47    params['uri'] = uri
48    flavor = params.get('flavor', None)
49    if not flavor or flavor not in cls.flavor_configs:
50        error(f"Invalid flavor '{flavor}' detected from the provided URI.")
51
52    if 'database' not in params:
53        error("Unable to determine the database from the provided URI.")
54
55    if flavor in ('sqlite', 'duckdb'):
56        if params['database'] == ':memory:':
57            params['label'] = label or f'memory_{flavor}'
58        else:
59            params['label'] = label or params['database'].split(os.path.sep)[-1].lower()
60    else:
61        params['label'] = label or (
62            (
63                (params['username'] + '@' if 'username' in params else '')
64                + params.get('host', '')
65                + ('/' if 'host' in params else '')
66                + params.get('database', '')
67            ).lower()
68        )
69
70    return cls(**params) if not as_dict else params

Create a new SQLConnector from a URI string.

Parameters
  • uri (str): The URI connection string.
  • label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
  • as_dict (bool, default False): If True, return a dictionary of the keyword arguments necessary to create a new SQLConnector, otherwise create a new object.
Returns
  • A new SQLConnector object or a dictionary of attributes (if as_dict is True).
@staticmethod
def parse_uri(uri: str) -> Dict[str, Any]:
 73@staticmethod
 74def parse_uri(uri: str) -> Dict[str, Any]:
 75    """
 76    Parse a URI string into a dictionary of parameters.
 77
 78    Parameters
 79    ----------
 80    uri: str
 81        The database connection URI.
 82
 83    Returns
 84    -------
 85    A dictionary of attributes.
 86
 87    Examples
 88    --------
 89    >>> parse_uri('sqlite:////home/foo/bar.db')
 90    {'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
 91    >>> parse_uri(
 92    ...     'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
 93    ...     + '/master?driver=ODBC+Driver+17+for+SQL+Server'
 94    ... )
 95    {'host': 'localhost', 'database': 'master', 'username': 'sa',
 96    'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
 97    'driver': 'ODBC Driver 17 for SQL Server'}
 98    >>> 
 99    """
100    from urllib.parse import parse_qs, urlparse
101    sqlalchemy = attempt_import('sqlalchemy')
102    parser = sqlalchemy.engine.url.make_url
103    params = parser(uri).translate_connect_args()
104    params['flavor'] = uri.split(':')[0].split('+')[0]
105    if params['flavor'] == 'postgres':
106        params['flavor'] = 'postgresql'
107    if '?' in uri:
108        parsed_uri = urlparse(uri)
109        for key, value in parse_qs(parsed_uri.query).items():
110            params.update({key: value[0]})
111
112        if '--search_path' in params.get('options', ''):
113            params.update({'schema': params['options'].replace('--search_path=', '', 1)})
114    return params

Parse a URI string into a dictionary of parameters.

Parameters
  • uri (str): The database connection URI.
Returns
  • A dictionary of attributes.
Examples
>>> parse_uri('sqlite:////home/foo/bar.db')
{'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
>>> parse_uri(
...     'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
...     + '/master?driver=ODBC+Driver+17+for+SQL+Server'
... )
{'host': 'localhost', 'database': 'master', 'username': 'sa',
'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
'driver': 'ODBC Driver 17 for SQL Server'}
>>>
class APIConnector(meerschaum.connectors.Connector):
 20class APIConnector(Connector):
 21    """
 22    Connect to a Meerschaum API instance.
 23    """
 24
 25    IS_INSTANCE: bool = True
 26    IS_THREAD_SAFE: bool = False
 27
 28    OPTIONAL_ATTRIBUTES: List[str] = ['port']
 29
 30    from ._request import (
 31        make_request,
 32        get,
 33        post,
 34        put,
 35        patch,
 36        delete,
 37        wget,
 38    )
 39    from ._actions import (
 40        get_actions,
 41        do_action,
 42        do_action_async,
 43        do_action_legacy,
 44    )
 45    from ._misc import get_mrsm_version, get_chaining_status
 46    from ._pipes import (
 47        register_pipe,
 48        fetch_pipes_keys,
 49        edit_pipe,
 50        sync_pipe,
 51        delete_pipe,
 52        get_pipe_data,
 53        get_pipe_id,
 54        get_pipe_attributes,
 55        get_sync_time,
 56        pipe_exists,
 57        create_metadata,
 58        get_pipe_rowcount,
 59        drop_pipe,
 60        clear_pipe,
 61        get_pipe_columns_types,
 62        get_pipe_columns_indices,
 63    )
 64    from ._fetch import fetch
 65    from ._plugins import (
 66        register_plugin,
 67        install_plugin,
 68        delete_plugin,
 69        get_plugins,
 70        get_plugin_attributes,
 71    )
 72    from ._login import login, test_connection
 73    from ._users import (
 74        register_user,
 75        get_user_id,
 76        get_users,
 77        edit_user,
 78        delete_user,
 79        get_user_password_hash,
 80        get_user_type,
 81        get_user_attributes,
 82    )
 83    from ._uri import from_uri
 84    from ._jobs import (
 85        get_jobs,
 86        get_job,
 87        get_job_metadata,
 88        get_job_properties,
 89        get_job_exists,
 90        delete_job,
 91        start_job,
 92        create_job,
 93        stop_job,
 94        pause_job,
 95        get_logs,
 96        get_job_stop_time,
 97        monitor_logs,
 98        monitor_logs_async,
 99        get_job_is_blocking_on_stdin,
100        get_job_began,
101        get_job_ended,
102        get_job_paused,
103        get_job_status,
104    )
105
106    def __init__(
107        self,
108        label: Optional[str] = None,
109        wait: bool = False,
110        debug: bool = False,
111        **kw
112    ):
113        if 'uri' in kw:
114            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
115            label = label or from_uri_params.get('label', None)
116            _ = from_uri_params.pop('label', None)
117            kw.update(from_uri_params)
118
119        super().__init__('api', label=label, **kw)
120        if 'protocol' not in self.__dict__:
121            self.protocol = (
122                'https' if self.__dict__.get('uri', '').startswith('https')
123                else 'http'
124            )
125
126        if 'uri' not in self.__dict__:
127            self.verify_attributes(required_attributes)
128        else:
129            from meerschaum.connectors.sql import SQLConnector
130            conn_attrs = SQLConnector.parse_uri(self.__dict__['uri'])
131            if 'host' not in conn_attrs:
132                raise Exception(f"Invalid URI for '{self}'.")
133            self.__dict__.update(conn_attrs)
134
135        self.url = (
136            self.protocol + '://' +
137            self.host
138            + (
139                (':' + str(self.port))
140                if self.__dict__.get('port', None)
141                else ''
142            )
143        )
144        self._token = None
145        self._expires = None
146        self._session = None
147
148
149    @property
150    def URI(self) -> str:
151        """
152        Return the fully qualified URI.
153        """
154        username = self.__dict__.get('username', None)
155        password = self.__dict__.get('password', None)
156        creds = (username + ':' + password + '@') if username and password else ''
157        return (
158            self.protocol
159            + '://'
160            + creds
161            + self.host
162            + (
163                (':' + str(self.port))
164                if self.__dict__.get('port', None)
165                else ''
166            )
167        )
168
169
170    @property
171    def session(self):
172        if self._session is None:
173            certifi = attempt_import('certifi', lazy=False)
174            requests = attempt_import('requests', lazy=False)
175            if requests:
176                self._session = requests.Session()
177            if self._session is None:
178                error(f"Failed to import requests. Is requests installed?")
179        return self._session
180
181    @property
182    def token(self):
183        expired = (
184            True if self._expires is None else (
185                (
186                    self._expires
187                    <
188                    datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1)
189                )
190            )
191        )
192
193        if self._token is None or expired:
194            success, msg = self.login()
195            if not success:
196                warn(msg, stack=False)
197        return self._token

Connect to a Meerschaum API instance.

APIConnector( label: Optional[str] = None, wait: bool = False, debug: bool = False, **kw)
106    def __init__(
107        self,
108        label: Optional[str] = None,
109        wait: bool = False,
110        debug: bool = False,
111        **kw
112    ):
113        if 'uri' in kw:
114            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
115            label = label or from_uri_params.get('label', None)
116            _ = from_uri_params.pop('label', None)
117            kw.update(from_uri_params)
118
119        super().__init__('api', label=label, **kw)
120        if 'protocol' not in self.__dict__:
121            self.protocol = (
122                'https' if self.__dict__.get('uri', '').startswith('https')
123                else 'http'
124            )
125
126        if 'uri' not in self.__dict__:
127            self.verify_attributes(required_attributes)
128        else:
129            from meerschaum.connectors.sql import SQLConnector
130            conn_attrs = SQLConnector.parse_uri(self.__dict__['uri'])
131            if 'host' not in conn_attrs:
132                raise Exception(f"Invalid URI for '{self}'.")
133            self.__dict__.update(conn_attrs)
134
135        self.url = (
136            self.protocol + '://' +
137            self.host
138            + (
139                (':' + str(self.port))
140                if self.__dict__.get('port', None)
141                else ''
142            )
143        )
144        self._token = None
145        self._expires = None
146        self._session = None

Set the given keyword arguments as attributes.

Parameters
  • type (str): The type of the connector (e.g. sql, api, plugin).
  • label (str): The label for the connector.
Examples

Run mrsm edit config and to edit connectors in the YAML file:

meerschaum:
    connections:
        {type}:
            {label}:
                ### attributes go here
IS_INSTANCE: bool = True
IS_THREAD_SAFE: bool = False
OPTIONAL_ATTRIBUTES: List[str] = ['port']
url
URI: str
149    @property
150    def URI(self) -> str:
151        """
152        Return the fully qualified URI.
153        """
154        username = self.__dict__.get('username', None)
155        password = self.__dict__.get('password', None)
156        creds = (username + ':' + password + '@') if username and password else ''
157        return (
158            self.protocol
159            + '://'
160            + creds
161            + self.host
162            + (
163                (':' + str(self.port))
164                if self.__dict__.get('port', None)
165                else ''
166            )
167        )

Return the fully qualified URI.

session
170    @property
171    def session(self):
172        if self._session is None:
173            certifi = attempt_import('certifi', lazy=False)
174            requests = attempt_import('requests', lazy=False)
175            if requests:
176                self._session = requests.Session()
177            if self._session is None:
178                error(f"Failed to import requests. Is requests installed?")
179        return self._session
token
181    @property
182    def token(self):
183        expired = (
184            True if self._expires is None else (
185                (
186                    self._expires
187                    <
188                    datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1)
189                )
190            )
191        )
192
193        if self._token is None or expired:
194            success, msg = self.login()
195            if not success:
196                warn(msg, stack=False)
197        return self._token
def make_request( self, method: str, r_url: str, headers: Optional[Dict[str, Any]] = None, use_token: bool = True, debug: bool = False, **kwargs: Any) -> requests.models.Response:
 28def make_request(
 29    self,
 30    method: str,
 31    r_url: str,
 32    headers: Optional[Dict[str, Any]] = None,
 33    use_token: bool = True,
 34    debug: bool = False,
 35    **kwargs: Any
 36) -> 'requests.Response':
 37    """
 38    Make a request to this APIConnector's endpoint using the in-memory session.
 39
 40    Parameters
 41    ----------
 42    method: str
 43        The kind of request to make.
 44        Accepted values:
 45        - `'GET'`
 46        - `'OPTIONS'`
 47        - `'HEAD'`
 48        - `'POST'`
 49        - `'PUT'`
 50        - `'PATCH'`
 51        - `'DELETE'`
 52
 53    r_url: str
 54        The relative URL for the endpoint (e.g. `'/pipes'`).
 55
 56    headers: Optional[Dict[str, Any]], default None
 57        The headers to use for the request.
 58        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
 59
 60    use_token: bool, default True
 61        If `True`, add the authorization token to the headers.
 62
 63    debug: bool, default False
 64        Verbosity toggle.
 65
 66    kwargs: Any
 67        All other keyword arguments are passed to `requests.request`.
 68
 69    Returns
 70    -------
 71    A `requests.Reponse` object.
 72    """
 73    if method.upper() not in METHODS:
 74        raise ValueError(f"Method '{method}' is not supported.")
 75
 76    verify = self.__dict__.get('verify', None)
 77    if 'verify' not in kwargs and isinstance(verify, bool):
 78        kwargs['verify'] = verify
 79
 80    headers = (
 81        copy.deepcopy(headers)
 82        if isinstance(headers, dict)
 83        else {}
 84    )
 85
 86    if use_token:
 87        headers.update({'Authorization': f'Bearer {self.token}'})
 88
 89    if 'timeout' not in kwargs:
 90        kwargs['timeout'] = STATIC_CONFIG['api']['default_timeout']
 91
 92    request_url = urllib.parse.urljoin(self.url, r_url)
 93    if debug:
 94        dprint(f"[{self}] Sending a '{method.upper()}' request to {request_url}")
 95
 96    return self.session.request(
 97        method.upper(),
 98        request_url,
 99        headers = headers,
100        **kwargs
101    )

Make a request to this APIConnector's endpoint using the in-memory session.

Parameters
  • method (str): The kind of request to make. Accepted values:
    • 'GET'
    • 'OPTIONS'
    • 'HEAD'
    • 'POST'
    • 'PUT'
    • 'PATCH'
    • 'DELETE'
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def get(self, r_url: str, **kwargs: Any) -> requests.models.Response:
104def get(self, r_url: str, **kwargs: Any) -> 'requests.Response':
105    """
106    Wrapper for `requests.get`.
107
108    Parameters
109    ----------
110    r_url: str
111        The relative URL for the endpoint (e.g. `'/pipes'`).
112
113    headers: Optional[Dict[str, Any]], default None
114        The headers to use for the request.
115        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
116
117    use_token: bool, default True
118        If `True`, add the authorization token to the headers.
119
120    debug: bool, default False
121        Verbosity toggle.
122
123    kwargs: Any
124        All other keyword arguments are passed to `requests.request`.
125
126    Returns
127    -------
128    A `requests.Reponse` object.
129
130    """
131    return self.make_request('GET', r_url, **kwargs)

Wrapper for requests.get.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def post(self, r_url: str, **kwargs: Any) -> requests.models.Response:
134def post(self, r_url: str, **kwargs: Any) -> 'requests.Response':
135    """
136    Wrapper for `requests.post`.
137
138    Parameters
139    ----------
140    r_url: str
141        The relative URL for the endpoint (e.g. `'/pipes'`).
142
143    headers: Optional[Dict[str, Any]], default None
144        The headers to use for the request.
145        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
146
147    use_token: bool, default True
148        If `True`, add the authorization token to the headers.
149
150    debug: bool, default False
151        Verbosity toggle.
152
153    kwargs: Any
154        All other keyword arguments are passed to `requests.request`.
155
156    Returns
157    -------
158    A `requests.Reponse` object.
159
160    """
161    return self.make_request('POST', r_url, **kwargs)

Wrapper for requests.post.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def put(self, r_url: str, **kwargs: Any) -> requests.models.Response:
193def put(self, r_url: str, **kwargs: Any) -> 'requests.Response':
194    """
195    Wrapper for `requests.put`.
196
197    Parameters
198    ----------
199    r_url: str
200        The relative URL for the endpoint (e.g. `'/pipes'`).
201
202    headers: Optional[Dict[str, Any]], default None
203        The headers to use for the request.
204        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
205
206    use_token: bool, default True
207        If `True`, add the authorization token to the headers.
208
209    debug: bool, default False
210        Verbosity toggle.
211
212    kwargs: Any
213        All other keyword arguments are passed to `requests.request`.
214
215    Returns
216    -------
217    A `requests.Reponse` object.
218    """
219    return self.make_request('PUT', r_url, **kwargs)

Wrapper for requests.put.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def patch(self, r_url: str, **kwargs: Any) -> requests.models.Response:
164def patch(self, r_url: str, **kwargs: Any) -> 'requests.Response':
165    """
166    Wrapper for `requests.patch`.
167
168    Parameters
169    ----------
170    r_url: str
171        The relative URL for the endpoint (e.g. `'/pipes'`).
172
173    headers: Optional[Dict[str, Any]], default None
174        The headers to use for the request.
175        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
176
177    use_token: bool, default True
178        If `True`, add the authorization token to the headers.
179
180    debug: bool, default False
181        Verbosity toggle.
182
183    kwargs: Any
184        All other keyword arguments are passed to `requests.request`.
185
186    Returns
187    -------
188    A `requests.Reponse` object.
189    """
190    return self.make_request('PATCH', r_url, **kwargs)

Wrapper for requests.patch.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def delete(self, r_url: str, **kwargs: Any) -> requests.models.Response:
222def delete(self, r_url: str, **kwargs: Any) -> 'requests.Response':
223    """
224    Wrapper for `requests.delete`.
225
226    Parameters
227    ----------
228    r_url: str
229        The relative URL for the endpoint (e.g. `'/pipes'`).
230
231    headers: Optional[Dict[str, Any]], default None
232        The headers to use for the request.
233        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
234
235    use_token: bool, default True
236        If `True`, add the authorization token to the headers.
237
238    debug: bool, default False
239        Verbosity toggle.
240
241    kwargs: Any
242        All other keyword arguments are passed to `requests.request`.
243
244    Returns
245    -------
246    A `requests.Reponse` object.
247    """
248    return self.make_request('DELETE', r_url, **kwargs)

Wrapper for requests.delete.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def wget( self, r_url: str, dest: Union[str, pathlib.Path, NoneType] = None, headers: Optional[Dict[str, Any]] = None, use_token: bool = True, debug: bool = False, **kw: Any) -> pathlib.Path:
251def wget(
252        self,
253        r_url: str,
254        dest: Optional[Union[str, pathlib.Path]] = None,
255        headers: Optional[Dict[str, Any]] = None,
256        use_token: bool = True,
257        debug: bool = False,
258        **kw: Any
259    ) -> pathlib.Path:
260    """Mimic wget with requests.
261    """
262    from meerschaum.utils.misc import wget
263    if headers is None:
264        headers = {}
265    if use_token:
266        headers.update({'Authorization': f'Bearer {self.token}'})
267    request_url = urllib.parse.urljoin(self.url, r_url)
268    if debug:
269        dprint(
270            f"[{self}] Downloading {request_url}"
271            + (f' to {dest}' if dest is not None else '')
272            + "..."
273        )
274    return wget(request_url, dest=dest, headers=headers, **kw)

Mimic wget with requests.

def get_actions(self):
24def get_actions(self):
25    """Get available actions from the API instance."""
26    return self.get(ACTIONS_ENDPOINT)

Get available actions from the API instance.

def do_action(self, sysargs: List[str]) -> Tuple[bool, str]:
29def do_action(self, sysargs: List[str]) -> SuccessTuple:
30    """
31    Execute a Meerschaum action remotely.
32    """
33    return asyncio.run(self.do_action_async(sysargs))

Execute a Meerschaum action remotely.

async def do_action_async( self, sysargs: List[str], callback_function: Callable[[str], NoneType] = functools.partial(<built-in function print>, end='')) -> Tuple[bool, str]:
36async def do_action_async(
37    self,
38    sysargs: List[str],
39    callback_function: Callable[[str], None] = partial(print, end=''),
40) -> SuccessTuple:
41    """
42    Execute an action as a temporary remote job.
43    """
44    from meerschaum._internal.arguments import remove_api_executor_keys
45    from meerschaum.utils.misc import generate_password
46    sysargs = remove_api_executor_keys(sysargs)
47
48    job_name = TEMP_PREFIX + generate_password(12)
49    job = mrsm.Job(job_name, sysargs, executor_keys=str(self))
50
51    start_success, start_msg = job.start()
52    if not start_success:
53        return start_success, start_msg
54
55    await job.monitor_logs_async(
56        callback_function=callback_function,
57        stop_on_exit=True,
58        strip_timestamps=True,
59    )
60
61    success, msg = job.result
62    job.delete()
63    return success, msg

Execute an action as a temporary remote job.

def do_action_legacy( self, action: Optional[List[str]] = None, sysargs: Optional[List[str]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
 66def do_action_legacy(
 67    self,
 68    action: Optional[List[str]] = None,
 69    sysargs: Optional[List[str]] = None,
 70    debug: bool = False,
 71    **kw
 72) -> SuccessTuple:
 73    """
 74    NOTE: This method is deprecated.
 75    Please use `do_action()` or `do_action_async()`.
 76
 77    Execute a Meerschaum action remotely.
 78
 79    If `sysargs` are provided, parse those instead.
 80    Otherwise infer everything from keyword arguments.
 81
 82    Examples
 83    --------
 84    >>> conn = mrsm.get_connector('api:main')
 85    >>> conn.do_action(['show', 'pipes'])
 86    (True, "Success")
 87    >>> conn.do_action(['show', 'arguments'], name='test')
 88    (True, "Success")
 89    """
 90    import sys, json
 91    from meerschaum.utils.debug import dprint
 92    from meerschaum.config.static import STATIC_CONFIG
 93    from meerschaum.utils.misc import json_serialize_datetime
 94    if action is None:
 95        action = []
 96
 97    if sysargs is not None and action and action[0] == '':
 98        from meerschaum._internal.arguments import parse_arguments
 99        if debug:
100            dprint(f"Parsing sysargs:\n{sysargs}")
101        json_dict = parse_arguments(sysargs)
102    else:
103        json_dict = kw
104        json_dict['action'] = action
105        if 'noask' not in kw:
106            json_dict['noask'] = True
107        if 'yes' not in kw:
108            json_dict['yes'] = True
109        if debug:
110            json_dict['debug'] = debug
111
112    root_action = json_dict['action'][0]
113    del json_dict['action'][0]
114    r_url = f"{STATIC_CONFIG['api']['endpoints']['actions']}/{root_action}"
115    
116    if debug:
117        from meerschaum.utils.formatting import pprint
118        dprint(f"Sending data to '{self.url + r_url}':")
119        pprint(json_dict, stream=sys.stderr)
120
121    response = self.post(
122        r_url,
123        data = json.dumps(json_dict, default=json_serialize_datetime),
124        debug = debug,
125    )
126    try:
127        response_list = json.loads(response.text)
128        if isinstance(response_list, dict) and 'detail' in response_list:
129            return False, response_list['detail']
130    except Exception as e:
131        print(f"Invalid response: {response}")
132        print(e)
133        return False, response.text
134    if debug:
135        dprint(response)
136    try:
137        return response_list[0], response_list[1]
138    except Exception as e:
139        return False, f"Failed to parse result from action '{root_action}'"

NOTE: This method is deprecated. Please use do_action() or do_action_async().

Execute a Meerschaum action remotely.

If sysargs are provided, parse those instead. Otherwise infer everything from keyword arguments.

Examples
>>> conn = mrsm.get_connector('api:main')
>>> conn.do_action(['show', 'pipes'])
(True, "Success")
>>> conn.do_action(['show', 'arguments'], name='test')
(True, "Success")
def get_mrsm_version(self, **kw) -> Optional[str]:
13def get_mrsm_version(self, **kw) -> Optional[str]:
14    """
15    Return the Meerschaum version of the API instance.
16    """
17    from meerschaum.config.static import STATIC_CONFIG
18    try:
19        j = self.get(
20            STATIC_CONFIG['api']['endpoints']['version'] + '/mrsm',
21            use_token=False,
22            **kw
23        ).json()
24    except Exception as e:
25        return None
26    if isinstance(j, dict) and 'detail' in j:
27        return None
28    return j

Return the Meerschaum version of the API instance.

def get_chaining_status(self, **kw) -> Optional[bool]:
30def get_chaining_status(self, **kw) -> Optional[bool]:
31    """
32    Fetch the chaining status of the API instance.
33    """
34    from meerschaum.config.static import STATIC_CONFIG
35    try:
36        response = self.get(
37            STATIC_CONFIG['api']['endpoints']['chaining'],
38            use_token = True,
39            **kw
40        )
41        if not response:
42            return None
43    except Exception as e:
44        return None
45
46    return response.json()

Fetch the chaining status of the API instance.

def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
34def register_pipe(
35    self,
36    pipe: mrsm.Pipe,
37    debug: bool = False
38) -> SuccessTuple:
39    """Submit a POST to the API to register a new Pipe object.
40    Returns a tuple of (success_bool, response_dict).
41    """
42    from meerschaum.utils.debug import dprint
43    from meerschaum.config.static import STATIC_CONFIG
44    ### NOTE: if `parameters` is supplied in the Pipe constructor,
45    ###       then `pipe.parameters` will exist and not be fetched from the database.
46    r_url = pipe_r_url(pipe)
47    response = self.post(
48        r_url + '/register',
49        json = pipe.parameters,
50        debug = debug,
51    )
52    if debug:
53        dprint(response.text)
54
55    if not response:
56        return False, response.text
57
58    response_data = response.json()
59    if isinstance(response_data, list):
60        response_tuple = response_data[0], response_data[1]
61    elif 'detail' in response.json():
62        response_tuple = response.__bool__(), response_data['detail']
63    else:
64        response_tuple = response.__bool__(), response.text
65    return response_tuple

Submit a POST to the API to register a new Pipe object. Returns a tuple of (success_bool, response_dict).

def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False) -> List[Tuple[str, str, Optional[str]]]:
101def fetch_pipes_keys(
102    self,
103    connector_keys: Optional[List[str]] = None,
104    metric_keys: Optional[List[str]] = None,
105    location_keys: Optional[List[str]] = None,
106    tags: Optional[List[str]] = None,
107    params: Optional[Dict[str, Any]] = None,
108    debug: bool = False
109) -> Union[List[Tuple[str, str, Union[str, None]]]]:
110    """
111    Fetch registered Pipes' keys from the API.
112    
113    Parameters
114    ----------
115    connector_keys: Optional[List[str]], default None
116        The connector keys for the query.
117
118    metric_keys: Optional[List[str]], default None
119        The metric keys for the query.
120
121    location_keys: Optional[List[str]], default None
122        The location keys for the query.
123
124    tags: Optional[List[str]], default None
125        A list of tags for the query.
126
127    params: Optional[Dict[str, Any]], default None
128        A parameters dictionary for filtering against the `pipes` table
129        (e.g. `{'connector_keys': 'plugin:foo'}`).
130        Not recommeded to be used.
131
132    debug: bool, default False
133        Verbosity toggle.
134
135    Returns
136    -------
137    A list of tuples containing pipes' keys.
138    """
139    from meerschaum.config.static import STATIC_CONFIG
140    if connector_keys is None:
141        connector_keys = []
142    if metric_keys is None:
143        metric_keys = []
144    if location_keys is None:
145        location_keys = []
146    if tags is None:
147        tags = []
148
149    r_url = STATIC_CONFIG['api']['endpoints']['pipes'] + '/keys'
150    try:
151        j = self.get(
152            r_url,
153            params = {
154                'connector_keys': json.dumps(connector_keys),
155                'metric_keys': json.dumps(metric_keys),
156                'location_keys': json.dumps(location_keys),
157                'tags': json.dumps(tags),
158                'params': json.dumps(params),
159            },
160            debug=debug
161        ).json()
162    except Exception as e:
163        error(str(e))
164
165    if 'detail' in j:
166        error(j['detail'], stack=False)
167    return [tuple(r) for r in j]

Fetch registered Pipes' keys from the API.

Parameters
  • connector_keys (Optional[List[str]], default None): The connector keys for the query.
  • metric_keys (Optional[List[str]], default None): The metric keys for the query.
  • location_keys (Optional[List[str]], default None): The location keys for the query.
  • tags (Optional[List[str]], default None): A list of tags for the query.
  • params (Optional[Dict[str, Any]], default None): A parameters dictionary for filtering against the pipes table (e.g. {'connector_keys': 'plugin:foo'}). Not recommeded to be used.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A list of tuples containing pipes' keys.
def edit_pipe( self, pipe: meerschaum.Pipe, patch: bool = False, debug: bool = False) -> Tuple[bool, str]:
68def edit_pipe(
69    self,
70    pipe: mrsm.Pipe,
71    patch: bool = False,
72    debug: bool = False,
73) -> SuccessTuple:
74    """Submit a PATCH to the API to edit an existing Pipe object.
75    Returns a tuple of (success_bool, response_dict).
76    """
77    from meerschaum.utils.debug import dprint
78    ### NOTE: if `parameters` is supplied in the Pipe constructor,
79    ###       then `pipe.parameters` will exist and not be fetched from the database.
80    r_url = pipe_r_url(pipe)
81    response = self.patch(
82        r_url + '/edit',
83        params = {'patch': patch,},
84        json = pipe.parameters,
85        debug = debug,
86    )
87    if debug:
88        dprint(response.text)
89
90    response_data = response.json()
91
92    if isinstance(response.json(), list):
93        response_tuple = response_data[0], response_data[1]
94    elif 'detail' in response.json():
95        response_tuple = response.__bool__(), response_data['detail']
96    else:
97        response_tuple = response.__bool__(), response.text
98    return response_tuple

Submit a PATCH to the API to edit an existing Pipe object. Returns a tuple of (success_bool, response_dict).

def sync_pipe( self, pipe: meerschaum.Pipe, df: "Optional[Union['pd.DataFrame', Dict[Any, Any], str]]" = None, chunksize: Optional[int] = -1, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
170def sync_pipe(
171    self,
172    pipe: mrsm.Pipe,
173    df: Optional[Union['pd.DataFrame', Dict[Any, Any], str]] = None,
174    chunksize: Optional[int] = -1,
175    debug: bool = False,
176    **kw: Any
177) -> SuccessTuple:
178    """Sync a DataFrame into a Pipe."""
179    from decimal import Decimal
180    from meerschaum.utils.debug import dprint
181    from meerschaum.utils.misc import json_serialize_datetime, items_str
182    from meerschaum.config import get_config
183    from meerschaum.utils.packages import attempt_import
184    from meerschaum.utils.dataframe import get_numeric_cols, to_json
185    begin = time.time()
186    more_itertools = attempt_import('more_itertools')
187    if df is None:
188        msg = f"DataFrame is `None`. Cannot sync {pipe}."
189        return False, msg
190
191    def get_json_str(c):
192        ### allow syncing dict or JSON without needing to import pandas (for IOT devices)
193        if isinstance(c, (dict, list)):
194            return json.dumps(c, default=json_serialize_datetime)
195        return to_json(c, orient='columns')
196
197    df = json.loads(df) if isinstance(df, str) else df
198
199    _chunksize: Optional[int] = (1 if chunksize is None else (
200        get_config('system', 'connectors', 'sql', 'chunksize') if chunksize == -1
201        else chunksize
202    ))
203    keys: List[str] = list(df.columns)
204    chunks = []
205    if hasattr(df, 'index'):
206        df = df.reset_index(drop=True)
207        is_dask = 'dask' in df.__module__
208        chunks = (
209            (df.iloc[i] for i in more_itertools.chunked(df.index, _chunksize))
210            if not is_dask
211            else [partition.compute() for partition in df.partitions]
212        )
213
214        numeric_cols = get_numeric_cols(df)
215        if numeric_cols:
216            for col in numeric_cols:
217                df[col] = df[col].apply(lambda x: f'{x:f}' if isinstance(x, Decimal) else x)
218            pipe_dtypes = pipe.dtypes
219            new_numeric_cols = [
220                col
221                for col in numeric_cols
222                if pipe_dtypes.get(col, None) != 'numeric'
223            ]
224            pipe.dtypes.update({
225                col: 'numeric'
226                for col in new_numeric_cols
227            })
228            edit_success, edit_msg = pipe.edit(debug=debug)
229            if not edit_success:
230                warn(
231                    "Failed to update new numeric columns "
232                    + f"{items_str(new_numeric_cols)}:\n{edit_msg}"
233                )
234    elif isinstance(df, dict):
235        ### `_chunks` is a dict of lists of dicts.
236        ### e.g. {'a' : [ {'a':[1, 2]}, {'a':[3, 4]} ] }
237        _chunks = {k: [] for k in keys}
238        for k in keys:
239            chunk_iter = more_itertools.chunked(df[k], _chunksize)
240            for l in chunk_iter:
241                _chunks[k].append({k: l})
242
243        ### `chunks` is a list of dicts (e.g. orient by rows in pandas JSON).
244        for k, l in _chunks.items():
245            for i, c in enumerate(l):
246                try:
247                    chunks[i].update(c)
248                except IndexError:
249                    chunks.append(c)
250    elif isinstance(df, list):
251        chunks = (df[i] for i in more_itertools.chunked(df, _chunksize))
252
253    ### Send columns in case the user has defined them locally.
254    if pipe.columns:
255        kw['columns'] = json.dumps(pipe.columns)
256    r_url = pipe_r_url(pipe) + '/data'
257
258    rowcount = 0
259    num_success_chunks = 0
260    for i, c in enumerate(chunks):
261        if debug:
262            dprint(f"[{self}] Posting chunk {i} to {r_url}...")
263        if len(c) == 0:
264            if debug:
265                dprint(f"[{self}] Skipping empty chunk...")
266            continue
267        json_str = get_json_str(c)
268
269        try:
270            response = self.post(
271                r_url,
272                ### handles check_existing
273                params = kw,
274                data = json_str,
275                debug = debug
276            )
277        except Exception as e:
278            msg = f"Failed to post a chunk to {pipe}:\n{e}"
279            warn(msg)
280            return False, msg
281            
282        if not response:
283            return False, f"Failed to sync a chunk:\n{response.text}"
284
285        try:
286            j = json.loads(response.text)
287        except Exception as e:
288            return False, f"Failed to parse response from syncing {pipe}:\n{e}"
289
290        if isinstance(j, dict) and 'detail' in j:
291            return False, j['detail']
292
293        try:
294            j = tuple(j)
295        except Exception as e:
296            return False, response.text
297
298        if debug:
299            dprint("Received response: " + str(j))
300        if not j[0]:
301            return j
302
303        rowcount += len(c)
304        num_success_chunks += 1
305
306    success_tuple = True, (
307        f"It took {round(time.time() - begin, 2)} seconds to sync {rowcount} row"
308        + ('s' if rowcount != 1 else '')
309        + f" across {num_success_chunks} chunk" + ('s' if num_success_chunks != 1 else '') +
310        f" to {pipe}."
311    )
312    return success_tuple

Sync a DataFrame into a Pipe.

def delete_pipe( self, pipe: Optional[meerschaum.Pipe] = None, debug: bool = None) -> Tuple[bool, str]:
315def delete_pipe(
316    self,
317    pipe: Optional[meerschaum.Pipe] = None,
318    debug: bool = None,        
319) -> SuccessTuple:
320    """Delete a Pipe and drop its table."""
321    if pipe is None:
322        error(f"Pipe cannot be None.")
323    r_url = pipe_r_url(pipe)
324    response = self.delete(
325        r_url + '/delete',
326        debug = debug,
327    )
328    if debug:
329        dprint(response.text)
330
331    response_data = response.json()
332    if isinstance(response.json(), list):
333        response_tuple = response_data[0], response_data[1]
334    elif 'detail' in response.json():
335        response_tuple = response.__bool__(), response_data['detail']
336    else:
337        response_tuple = response.__bool__(), response.text
338    return response_tuple

Delete a Pipe and drop its table.

def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, as_chunks: bool = False, debug: bool = False, **kw: Any) -> Optional[pandas.core.frame.DataFrame]:
341def get_pipe_data(
342    self,
343    pipe: meerschaum.Pipe,
344    select_columns: Optional[List[str]] = None,
345    omit_columns: Optional[List[str]] = None,
346    begin: Union[str, datetime, int, None] = None,
347    end: Union[str, datetime, int, None] = None,
348    params: Optional[Dict[str, Any]] = None,
349    as_chunks: bool = False,
350    debug: bool = False,
351    **kw: Any
352) -> Union[pandas.DataFrame, None]:
353    """Fetch data from the API."""
354    r_url = pipe_r_url(pipe)
355    chunks_list = []
356    while True:
357        try:
358            response = self.get(
359                r_url + "/data",
360                params={
361                    'select_columns': json.dumps(select_columns),
362                    'omit_columns': json.dumps(omit_columns),
363                    'begin': begin,
364                    'end': end,
365                    'params': json.dumps(params, default=str)
366                },
367                debug=debug
368            )
369            if not response.ok:
370                return None
371            j = response.json()
372        except Exception as e:
373            warn(f"Failed to get data for {pipe}:\n{e}")
374            return None
375        if isinstance(j, dict) and 'detail' in j:
376            return False, j['detail']
377        break
378
379    from meerschaum.utils.packages import import_pandas
380    from meerschaum.utils.dataframe import parse_df_datetimes, add_missing_cols_to_df
381    from meerschaum.utils.dtypes import are_dtypes_equal
382    pd = import_pandas()
383    try:
384        df = pd.read_json(StringIO(response.text))
385    except Exception as e:
386        warn(f"Failed to parse response for {pipe}:\n{e}")
387        return None
388
389    if len(df.columns) == 0:
390        return add_missing_cols_to_df(df, pipe.dtypes)
391
392    df = parse_df_datetimes(
393        df,
394        ignore_cols = [
395            col
396            for col, dtype in pipe.dtypes.items()
397            if not are_dtypes_equal(str(dtype), 'datetime')
398        ],
399        strip_timezone=(pipe.tzinfo is None),
400        debug=debug,
401    )
402    return df

Fetch data from the API.

def get_pipe_id(self, pipe: meerschaum.Pipe, debug: bool = False) -> int:
405def get_pipe_id(
406    self,
407    pipe: mrsm.Pipe,
408    debug: bool = False,
409) -> int:
410    """Get a Pipe's ID from the API."""
411    from meerschaum.utils.misc import is_int
412    r_url = pipe_r_url(pipe)
413    response = self.get(
414        r_url + '/id',
415        debug = debug
416    )
417    if debug:
418        dprint(f"Got pipe ID: {response.text}")
419    try:
420        if is_int(response.text):
421            return int(response.text)
422    except Exception as e:
423        warn(f"Failed to get the ID for {pipe}:\n{e}")
424    return None

Get a Pipe's ID from the API.

def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, Any]:
427def get_pipe_attributes(
428    self,
429    pipe: mrsm.Pipe,
430    debug: bool = False,
431) -> Dict[str, Any]:
432    """Get a Pipe's attributes from the API
433
434    Parameters
435    ----------
436    pipe: meerschaum.Pipe
437        The pipe whose attributes we are fetching.
438        
439    Returns
440    -------
441    A dictionary of a pipe's attributes.
442    If the pipe does not exist, return an empty dictionary.
443    """
444    r_url = pipe_r_url(pipe)
445    response = self.get(r_url + '/attributes', debug=debug)
446    try:
447        return json.loads(response.text)
448    except Exception as e:
449        warn(f"Failed to get the attributes for {pipe}:\n{e}")
450    return {}

Get a Pipe's attributes from the API

Parameters
Returns
  • A dictionary of a pipe's attributes.
  • If the pipe does not exist, return an empty dictionary.
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, debug: bool = False) -> Union[datetime.datetime, int, NoneType]:
453def get_sync_time(
454    self,
455    pipe: mrsm.Pipe,
456    params: Optional[Dict[str, Any]] = None,
457    newest: bool = True,
458    debug: bool = False,
459) -> Union[datetime, int, None]:
460    """Get a Pipe's most recent datetime value from the API.
461
462    Parameters
463    ----------
464    pipe: meerschaum.Pipe
465        The pipe to select from.
466
467    params: Optional[Dict[str, Any]], default None
468        Optional params dictionary to build the WHERE clause.
469
470    newest: bool, default True
471        If `True`, get the most recent datetime (honoring `params`).
472        If `False`, get the oldest datetime (ASC instead of DESC).
473
474    Returns
475    -------
476    The most recent (or oldest if `newest` is `False`) datetime of a pipe,
477    rounded down to the closest minute.
478    """
479    from meerschaum.utils.misc import is_int
480    from meerschaum.utils.warnings import warn
481    r_url = pipe_r_url(pipe)
482    response = self.get(
483        r_url + '/sync_time',
484        json = params,
485        params = {'newest': newest, 'debug': debug},
486        debug = debug,
487    )
488    if not response:
489        warn(f"Failed to get the sync time for {pipe}:\n" + response.text)
490        return None
491
492    j = response.json()
493    if j is None:
494        dt = None
495    else:
496        try:
497            dt = (
498                datetime.fromisoformat(j)
499                if not is_int(j)
500                else int(j)
501            )
502        except Exception as e:
503            warn(f"Failed to parse the sync time '{j}' for {pipe}:\n{e}")
504            dt = None
505    return dt

Get a Pipe's most recent datetime value from the API.

Parameters
  • pipe (meerschaum.Pipe): The pipe to select from.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause.
  • newest (bool, default True): If True, get the most recent datetime (honoring params). If False, get the oldest datetime (ASC instead of DESC).
Returns
  • The most recent (or oldest if newest is False) datetime of a pipe,
  • rounded down to the closest minute.
def pipe_exists(self, pipe: meerschaum.Pipe, debug: bool = False) -> bool:
508def pipe_exists(
509    self,
510    pipe: mrsm.Pipe,
511    debug: bool = False
512) -> bool:
513    """Check the API to see if a Pipe exists.
514
515    Parameters
516    ----------
517    pipe: 'meerschaum.Pipe'
518        The pipe which were are querying.
519        
520    Returns
521    -------
522    A bool indicating whether a pipe's underlying table exists.
523    """
524    from meerschaum.utils.debug import dprint
525    from meerschaum.utils.warnings import warn
526    r_url = pipe_r_url(pipe)
527    response = self.get(r_url + '/exists', debug=debug)
528    if not response:
529        warn(f"Failed to check if {pipe} exists:\n{response.text}")
530        return False
531    if debug:
532        dprint("Received response: " + str(response.text))
533    j = response.json()
534    if isinstance(j, dict) and 'detail' in j:
535        warn(j['detail'])
536    return j

Check the API to see if a Pipe exists.

Parameters
Returns
  • A bool indicating whether a pipe's underlying table exists.
def create_metadata(self, debug: bool = False) -> bool:
539def create_metadata(
540    self,
541    debug: bool = False
542) -> bool:
543    """Create metadata tables.
544
545    Returns
546    -------
547    A bool indicating success.
548    """
549    from meerschaum.utils.debug import dprint
550    from meerschaum.config.static import STATIC_CONFIG
551    r_url = STATIC_CONFIG['api']['endpoints']['metadata']
552    response = self.post(r_url, debug=debug)
553    if debug:
554        dprint("Create metadata response: {response.text}")
555    try:
556        metadata_response = json.loads(response.text)
557    except Exception as e:
558        warn(f"Failed to create metadata on {self}:\n{e}")
559        metadata_response = False
560    return False

Create metadata tables.

Returns
  • A bool indicating success.
def get_pipe_rowcount( self, pipe: meerschaum.Pipe, begin: Optional[datetime.datetime] = None, end: Optional[datetime.datetime] = None, params: Optional[Dict[str, Any]] = None, remote: bool = False, debug: bool = False) -> int:
563def get_pipe_rowcount(
564    self,
565    pipe: mrsm.Pipe,
566    begin: Optional[datetime] = None,
567    end: Optional[datetime] = None,
568    params: Optional[Dict[str, Any]] = None,
569    remote: bool = False,
570    debug: bool = False,
571) -> int:
572    """Get a pipe's row count from the API.
573
574    Parameters
575    ----------
576    pipe: 'meerschaum.Pipe':
577        The pipe whose row count we are counting.
578        
579    begin: Optional[datetime], default None
580        If provided, bound the count by this datetime.
581
582    end: Optional[datetime]
583        If provided, bound the count by this datetime.
584
585    params: Optional[Dict[str, Any]], default None
586        If provided, bound the count by these parameters.
587
588    remote: bool, default False
589
590    Returns
591    -------
592    The number of rows in the pipe's table, bound the given parameters.
593    If the table does not exist, return 0.
594    """
595    r_url = pipe_r_url(pipe)
596    response = self.get(
597        r_url + "/rowcount",
598        json = params,
599        params = {
600            'begin': begin,
601            'end': end,
602            'remote': remote,
603        },
604        debug = debug
605    )
606    if not response:
607        warn(f"Failed to get the rowcount for {pipe}:\n{response.text}")
608        return 0
609    try:
610        return int(json.loads(response.text))
611    except Exception as e:
612        warn(f"Failed to get the rowcount for {pipe}:\n{e}")
613    return 0

Get a pipe's row count from the API.

Parameters
  • pipe ('meerschaum.Pipe':): The pipe whose row count we are counting.
  • begin (Optional[datetime], default None): If provided, bound the count by this datetime.
  • end (Optional[datetime]): If provided, bound the count by this datetime.
  • params (Optional[Dict[str, Any]], default None): If provided, bound the count by these parameters.
  • remote (bool, default False):
Returns
  • The number of rows in the pipe's table, bound the given parameters.
  • If the table does not exist, return 0.
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
616def drop_pipe(
617    self,
618    pipe: mrsm.Pipe,
619    debug: bool = False
620) -> SuccessTuple:
621    """
622    Drop a pipe's table but maintain its registration.
623
624    Parameters
625    ----------
626    pipe: meerschaum.Pipe:
627        The pipe to be dropped.
628        
629    Returns
630    -------
631    A success tuple (bool, str).
632    """
633    from meerschaum.utils.warnings import error
634    from meerschaum.utils.debug import dprint
635    if pipe is None:
636        error(f"Pipe cannot be None.")
637    r_url = pipe_r_url(pipe)
638    response = self.delete(
639        r_url + '/drop',
640        debug = debug,
641    )
642    if debug:
643        dprint(response.text)
644
645    try:
646        data = response.json()
647    except Exception as e:
648        return False, f"Failed to drop {pipe}."
649
650    if isinstance(data, list):
651        response_tuple = data[0], data[1]
652    elif 'detail' in response.json():
653        response_tuple = response.__bool__(), data['detail']
654    else:
655        response_tuple = response.__bool__(), response.text
656
657    return response_tuple

Drop a pipe's table but maintain its registration.

Parameters
Returns
  • A success tuple (bool, str).
def clear_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kw) -> Tuple[bool, str]:
660def clear_pipe(
661    self,
662    pipe: mrsm.Pipe,
663    debug: bool = False,
664    **kw
665) -> SuccessTuple:
666    """
667    Delete rows in a pipe's table.
668
669    Parameters
670    ----------
671    pipe: meerschaum.Pipe
672        The pipe with rows to be deleted.
673        
674    Returns
675    -------
676    A success tuple.
677    """
678    kw.pop('metric_keys', None)
679    kw.pop('connector_keys', None)
680    kw.pop('location_keys', None)
681    kw.pop('action', None)
682    kw.pop('force', None)
683    return self.do_action_legacy(
684        ['clear', 'pipes'],
685        connector_keys=pipe.connector_keys,
686        metric_keys=pipe.metric_key,
687        location_keys=pipe.location_key,
688        force=True,
689        debug=debug,
690        **kw
691    )

Delete rows in a pipe's table.

Parameters
Returns
  • A success tuple.
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False) -> Optional[Dict[str, str]]:
694def get_pipe_columns_types(
695    self,
696    pipe: mrsm.Pipe,
697    debug: bool = False,
698) -> Union[Dict[str, str], None]:
699    """
700    Fetch the columns and types of the pipe's table.
701
702    Parameters
703    ----------
704    pipe: meerschaum.Pipe
705        The pipe whose columns to be queried.
706
707    Returns
708    -------
709    A dictionary mapping column names to their database types.
710
711    Examples
712    --------
713    >>> {
714    ...   'dt': 'TIMESTAMP WITHOUT TIMEZONE',
715    ...   'id': 'BIGINT',
716    ...   'val': 'DOUBLE PRECISION',
717    ... }
718    >>>
719    """
720    r_url = pipe_r_url(pipe) + '/columns/types'
721    response = self.get(
722        r_url,
723        debug=debug
724    )
725    j = response.json()
726    if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1:
727        warn(j['detail'])
728        return None
729    if not isinstance(j, dict):
730        warn(response.text)
731        return None
732    return j

Fetch the columns and types of the pipe's table.

Parameters
Returns
  • A dictionary mapping column names to their database types.
Examples
>>> {
...   'dt': 'TIMESTAMP WITHOUT TIMEZONE',
...   'id': 'BIGINT',
...   'val': 'DOUBLE PRECISION',
... }
>>>
def get_pipe_columns_indices( self, pipe: meerschaum.Pipe, debug: bool = False) -> Optional[Dict[str, str]]:
735def get_pipe_columns_indices(
736    self,
737    pipe: mrsm.Pipe,
738    debug: bool = False,
739) -> Union[Dict[str, str], None]:
740    """
741    Fetch the index information for a pipe.
742
743    Parameters
744    ----------
745    pipe: mrsm.Pipe
746        The pipe whose columns to be queried.
747
748    Returns
749    -------
750    A dictionary mapping column names to a list of associated index information.
751    """
752    r_url = pipe_r_url(pipe) + '/columns/indices'
753    response = self.get(
754        r_url,
755        debug=debug
756    )
757    j = response.json()
758    if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1:
759        warn(j['detail'])
760        return None
761    if not isinstance(j, dict):
762        warn(response.text)
763        return None
764    return j

Fetch the index information for a pipe.

Parameters
  • pipe (mrsm.Pipe): The pipe whose columns to be queried.
Returns
  • A dictionary mapping column names to a list of associated index information.
def fetch( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, str, int] = '', end: Union[datetime.datetime, int] = None, params: 'Optional[Dict, Any]' = None, debug: bool = False, **kw: Any) -> Iterator[pandas.core.frame.DataFrame]:
16def fetch(
17        self,
18        pipe: mrsm.Pipe,
19        begin: Union[datetime, str, int] = '',
20        end: Union[datetime, int] = None,
21        params: Optional[Dict, Any] = None,
22        debug: bool = False,
23        **kw: Any
24    ) -> Iterator['pd.DataFrame']:
25    """Get the Pipe data from the remote Pipe."""
26    from meerschaum.utils.debug import dprint
27    from meerschaum.utils.warnings import warn, error
28    from meerschaum.config._patch import apply_patch_to_config
29
30    fetch_params = pipe.parameters.get('fetch', {})
31    if not fetch_params:
32        warn(f"Missing 'fetch' parameters for {pipe}.", stack=False)
33        return None
34
35    pipe_meta = fetch_params.get('pipe', {})
36    ### Legacy: check for `connector_keys`, etc. at the root.
37    if not pipe_meta:
38        ck, mk, lk = (
39            fetch_params.get('connector_keys', None),
40            fetch_params.get('metric_key', None),
41            fetch_params.get('location_key', None),
42        )
43        if not ck or not mk:
44            warn(f"Missing `fetch:pipe` keys for {pipe}.", stack=False)
45            return None
46
47        pipe_meta.update({
48            'connector': ck,
49            'metric': mk,
50            'location': lk,
51        })
52
53    pipe_meta['instance'] = self
54    source_pipe = mrsm.Pipe(**pipe_meta)
55
56    _params = copy.deepcopy(params) if params is not None else {}
57    _params = apply_patch_to_config(_params, fetch_params.get('params', {}))
58    select_columns = fetch_params.get('select_columns', [])
59    omit_columns = fetch_params.get('omit_columns', [])
60
61    return source_pipe.get_data(
62        select_columns = select_columns,
63        omit_columns = omit_columns,
64        begin = begin,
65        end = end,
66        params = _params,
67        debug = debug,
68        as_iterator = True,
69    )

Get the Pipe data from the remote Pipe.

def register_plugin( self, plugin: meerschaum.Plugin, make_archive: bool = True, debug: bool = False) -> Tuple[bool, str]:
20def register_plugin(
21        self,
22        plugin: meerschaum.core.Plugin,
23        make_archive: bool = True,
24        debug: bool = False,
25    ) -> SuccessTuple:
26    """Register a plugin and upload its archive."""
27    import json
28    archive_path = plugin.make_tar(debug=debug) if make_archive else plugin.archive_path
29    file_pointer = open(archive_path, 'rb')
30    files = {'archive': file_pointer}
31    metadata = {
32        'version': plugin.version,
33        'attributes': json.dumps(plugin.attributes),
34    }
35    r_url = plugin_r_url(plugin)
36    try:
37        response = self.post(r_url, files=files, params=metadata, debug=debug)
38    except Exception as e:
39        return False, f"Failed to register plugin '{plugin}'."
40    finally:
41        file_pointer.close()
42
43    try:
44        success, msg = json.loads(response.text)
45    except Exception as e:
46        return False, response.text
47
48    return success, msg

Register a plugin and upload its archive.

def install_plugin( self, name: str, skip_deps: bool = False, force: bool = False, debug: bool = False) -> Tuple[bool, str]:
50def install_plugin(
51        self,
52        name: str,
53        skip_deps: bool = False,
54        force: bool = False,
55        debug: bool = False
56    ) -> SuccessTuple:
57    """Download and attempt to install a plugin from the API."""
58    import os, pathlib, json
59    from meerschaum.core import Plugin
60    from meerschaum.config._paths import PLUGINS_TEMP_RESOURCES_PATH
61    from meerschaum.utils.debug import dprint
62    from meerschaum.utils.packages import attempt_import
63    binaryornot_check = attempt_import('binaryornot.check', lazy=False)
64    r_url = plugin_r_url(name)
65    dest = pathlib.Path(os.path.join(PLUGINS_TEMP_RESOURCES_PATH, name + '.tar.gz'))
66    if debug:
67        dprint(f"Fetching from '{self.url + r_url}' to '{dest}'...")
68    archive_path = self.wget(r_url, dest, debug=debug) 
69    is_binary = binaryornot_check.is_binary(str(archive_path))
70    if not is_binary:
71        fail_msg = f"Failed to download binary for plugin '{name}'."
72        try:
73            with open(archive_path, 'r') as f:
74                j = json.load(f)
75            if isinstance(j, list):
76                success, msg = tuple(j)
77            elif isinstance(j, dict) and 'detail' in j:
78                success, msg = False, fail_msg
79        except Exception as e:
80            success, msg = False, fail_msg
81        return success, msg
82    plugin = Plugin(name, archive_path=archive_path, repo_connector=self)
83    return plugin.install(skip_deps=skip_deps, force=force, debug=debug)

Download and attempt to install a plugin from the API.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False) -> Tuple[bool, str]:
149def delete_plugin(
150        self,
151        plugin: meerschaum.core.Plugin,
152        debug: bool = False
153    ) -> SuccessTuple:
154    """Delete a plugin from an API repository."""
155    import json
156    r_url = plugin_r_url(plugin)
157    try:
158        response = self.delete(r_url, debug=debug)
159    except Exception as e:
160        return False, f"Failed to delete plugin '{plugin}'."
161
162    try:
163        success, msg = json.loads(response.text)
164    except Exception as e:
165        return False, response.text
166
167    return success, msg

Delete a plugin from an API repository.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False) -> Sequence[str]:
 85def get_plugins(
 86        self,
 87        user_id : Optional[int] = None,
 88        search_term : Optional[str] = None,
 89        debug : bool = False
 90    ) -> Sequence[str]:
 91    """Return a list of registered plugin names.
 92
 93    Parameters
 94    ----------
 95    user_id :
 96        If specified, return all plugins from a certain user.
 97    user_id : Optional[int] :
 98         (Default value = None)
 99    search_term : Optional[str] :
100         (Default value = None)
101    debug : bool :
102         (Default value = False)
103
104    Returns
105    -------
106
107    """
108    import json
109    from meerschaum.utils.warnings import warn, error
110    from meerschaum.config.static import STATIC_CONFIG
111    response = self.get(
112        STATIC_CONFIG['api']['endpoints']['plugins'],
113        params = {'user_id' : user_id, 'search_term' : search_term},
114        use_token = True,
115        debug = debug
116    )
117    if not response:
118        return []
119    plugins = json.loads(response.text)
120    if not isinstance(plugins, list):
121        error(response.text)
122    return plugins

Return a list of registered plugin names.

Parameters
  • user_id :: If specified, return all plugins from a certain user.
  • user_id (Optional[int] :): (Default value = None)
  • search_term (Optional[str] :): (Default value = None)
  • debug (bool :): (Default value = False)
  • Returns
  • -------
def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Mapping[str, Any]:
124def get_plugin_attributes(
125        self,
126        plugin: meerschaum.core.Plugin,
127        debug: bool = False
128    ) -> Mapping[str, Any]:
129    """
130    Return a plugin's attributes.
131    """
132    import json
133    from meerschaum.utils.warnings import warn, error
134    r_url = plugin_r_url(plugin) + '/attributes'
135    response = self.get(r_url, use_token=True, debug=debug)
136    attributes = response.json()
137    if isinstance(attributes, str) and attributes and attributes[0] == '{':
138        try:
139            attributes = json.loads(attributes)
140        except Exception as e:
141            pass
142    if not isinstance(attributes, dict):
143        error(response.text)
144    elif not response and 'detail' in attributes:
145        warn(attributes['detail'])
146        return {}
147    return attributes

Return a plugin's attributes.

def login( self, debug: bool = False, warn: bool = True, **kw: Any) -> Tuple[bool, str]:
13def login(
14        self,
15        debug: bool = False,
16        warn: bool = True,
17        **kw: Any
18    ) -> SuccessTuple:
19    """Log in and set the session token."""
20    from meerschaum.utils.warnings import warn as _warn, info, error
21    from meerschaum.core import User
22    from meerschaum.config.static import STATIC_CONFIG
23    import json, datetime
24    try:
25        login_data = {
26            'username': self.username,
27            'password': self.password,
28        }
29    except AttributeError:
30        return False, f"Please login with the command `login {self}`."
31    response = self.post(
32        STATIC_CONFIG['api']['endpoints']['login'],
33        data = login_data,
34        use_token = False,
35        debug = debug
36    )
37    if response:
38        msg = f"Successfully logged into '{self}' as user '{login_data['username']}'."
39        self._token = json.loads(response.text)['access_token']
40        self._expires = datetime.datetime.strptime(
41            json.loads(response.text)['expires'], 
42            '%Y-%m-%dT%H:%M:%S.%f'
43        )
44    else:
45        msg = (
46            f"Failed to log into '{self}' as user '{login_data['username']}'.\n" +
47            f"    Please verify login details for connector '{self}'."
48        )
49        if warn:
50            _warn(msg, stack=False)
51
52    return response.__bool__(), msg

Log in and set the session token.

def test_connection(self, **kw: Any) -> Optional[bool]:
55def test_connection(
56        self,
57        **kw: Any
58    ) -> Union[bool, None]:
59    """Test if a successful connection to the API may be made."""
60    from meerschaum.connectors.poll import retry_connect
61    _default_kw = {
62        'max_retries': 1, 'retry_wait': 0, 'warn': False,
63        'connector': self, 'enforce_chaining': False,
64        'enforce_login': False,
65    }
66    _default_kw.update(kw)
67    try:
68        return retry_connect(**_default_kw)
69    except Exception as e:
70        return False

Test if a successful connection to the API may be made.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
65def register_user(
66        self,
67        user: 'meerschaum.core.User',
68        debug: bool = False,
69        **kw: Any
70    ) -> SuccessTuple:
71    """Register a new user."""
72    import json
73    from meerschaum.config.static import STATIC_CONFIG
74    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/register"
75    data = {
76        'username': user.username,
77        'password': user.password,
78        'attributes': json.dumps(user.attributes),
79    }
80    if user.type:
81        data['type'] = user.type
82    if user.email:
83        data['email'] = user.email
84    response = self.post(r_url, data=data, debug=debug)
85    try:
86        _json = json.loads(response.text)
87        if isinstance(_json, dict) and 'detail' in _json:
88            return False, _json['detail']
89        success_tuple = tuple(_json)
90    except Exception:
91        msg = response.text if response else f"Failed to register user '{user}'."
92        return False, msg
93
94    return tuple(success_tuple)

Register a new user.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[int]:
 97def get_user_id(
 98        self,
 99        user: 'meerschaum.core.User',
100        debug: bool = False,
101        **kw: Any
102    ) -> Optional[int]:
103    """Get a user's ID."""
104    from meerschaum.config.static import STATIC_CONFIG
105    import json
106    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/id"
107    response = self.get(r_url, debug=debug, **kw)
108    try:
109        user_id = int(json.loads(response.text))
110    except Exception as e:
111        user_id = None
112    return user_id

Get a user's ID.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
13def get_users(
14        self,
15        debug: bool = False,
16        **kw : Any
17    ) -> List[str]:
18    """
19    Return a list of registered usernames.
20    """
21    from meerschaum.config.static import STATIC_CONFIG
22    import json
23    response = self.get(
24        f"{STATIC_CONFIG['api']['endpoints']['users']}",
25        debug = debug,
26        use_token = True,
27    )
28    if not response:
29        return []
30    try:
31        return response.json()
32    except Exception as e:
33        return []

Return a list of registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
35def edit_user(
36        self,
37        user: 'meerschaum.core.User',
38        debug: bool = False,
39        **kw: Any
40    ) -> SuccessTuple:
41    """Edit an existing user."""
42    import json
43    from meerschaum.config.static import STATIC_CONFIG
44    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/edit"
45    data = {
46        'username': user.username,
47        'password': user.password,
48        'type': user.type,
49        'email': user.email,
50        'attributes': json.dumps(user.attributes),
51    }
52    response = self.post(r_url, data=data, debug=debug)
53    try:
54        _json = json.loads(response.text)
55        if isinstance(_json, dict) and 'detail' in _json:
56            return False, _json['detail']
57        success_tuple = tuple(_json)
58    except Exception as e:
59        msg = response.text if response else f"Failed to edit user '{user}'."
60        return False, msg
61
62    return tuple(success_tuple)

Edit an existing user.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
114def delete_user(
115        self,
116        user: 'meerschaum.core.User',
117        debug: bool = False,
118        **kw: Any
119    ) -> SuccessTuple:
120    """Delete a user."""
121    from meerschaum.config.static import STATIC_CONFIG
122    import json
123    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}"
124    response = self.delete(r_url, debug=debug)
125    try:
126        _json = json.loads(response.text)
127        if isinstance(_json, dict) and 'detail' in _json:
128            return False, _json['detail']
129        success_tuple = tuple(_json)
130    except Exception as e:
131        success_tuple = False, f"Failed to delete user '{user.username}'."
132    return success_tuple

Delete a user.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
155def get_user_password_hash(
156        self,
157        user: 'meerschaum.core.User',
158        debug: bool = False,
159        **kw: Any
160    ) -> Optional[str]:
161    """If configured, get a user's password hash."""
162    from meerschaum.config.static import STATIC_CONFIG
163    r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/password_hash'
164    response = self.get(r_url, debug=debug, **kw)
165    if not response:
166        return None
167    return response.json()

If configured, get a user's password hash.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
169def get_user_type(
170        self,
171        user: 'meerschaum.core.User',
172        debug: bool = False,
173        **kw: Any
174    ) -> Optional[str]:
175    """If configured, get a user's type."""
176    from meerschaum.config.static import STATIC_CONFIG
177    r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/type'
178    response = self.get(r_url, debug=debug, **kw)
179    if not response:
180        return None
181    return response.json()

If configured, get a user's type.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw) -> int:
134def get_user_attributes(
135        self,
136        user: 'meerschaum.core.User',
137        debug: bool = False,
138        **kw
139    ) -> int:
140    """Get a user's attributes."""
141    from meerschaum.config.static import STATIC_CONFIG
142    import json
143    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/attributes"
144    response = self.get(r_url, debug=debug, **kw)
145    try:
146        attributes = json.loads(response.text)
147    except Exception as e:
148        attributes = None
149    return attributes

Get a user's attributes.

@classmethod
def from_uri( cls, uri: str, label: Optional[str] = None, as_dict: bool = False) -> Union[APIConnector, Dict[str, Union[str, int]]]:
13@classmethod
14def from_uri(
15    cls,
16    uri: str,
17    label: Optional[str] = None,
18    as_dict: bool = False,
19) -> Union[
20        'meerschaum.connectors.APIConnector',
21        Dict[str, Union[str, int]],
22    ]:
23    """
24    Create a new APIConnector from a URI string.
25
26    Parameters
27    ----------
28    uri: str
29        The URI connection string.
30
31    label: Optional[str], default None
32        If provided, use this as the connector label.
33        Otherwise use the determined database name.
34
35    as_dict: bool, default False
36        If `True`, return a dictionary of the keyword arguments
37        necessary to create a new `APIConnector`, otherwise create a new object.
38
39    Returns
40    -------
41    A new APIConnector object or a dictionary of attributes (if `as_dict` is `True`).
42    """
43    from meerschaum.connectors.sql import SQLConnector
44    params = SQLConnector.parse_uri(uri)
45    if 'host' not in params:
46        error("No host was found in the provided URI.")
47    params['protocol'] = params.pop('flavor')
48    params['label'] = label or (
49        (
50            (params['username'] + '@' if 'username' in params else '')
51            + params['host']
52        ).lower()
53    )
54
55    return cls(**params) if not as_dict else params

Create a new APIConnector from a URI string.

Parameters
  • uri (str): The URI connection string.
  • label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
  • as_dict (bool, default False): If True, return a dictionary of the keyword arguments necessary to create a new APIConnector, otherwise create a new object.
Returns
  • A new APIConnector object or a dictionary of attributes (if as_dict is True).
def get_jobs(self, debug: bool = False) -> Dict[str, meerschaum.Job]:
28def get_jobs(self, debug: bool = False) -> Dict[str, Job]:
29    """
30    Return a dictionary of remote jobs.
31    """
32    response = self.get(JOBS_ENDPOINT, debug=debug)
33    if not response:
34        warn(f"Failed to get remote jobs from {self}.")
35        return {}
36    return {
37        name: Job(
38            name,
39            job_meta['sysargs'],
40            executor_keys=str(self),
41            _properties=job_meta['daemon']['properties']
42        )
43        for name, job_meta in response.json().items()
44    }

Return a dictionary of remote jobs.

def get_job(self, name: str, debug: bool = False) -> meerschaum.Job:
47def get_job(self, name: str, debug: bool = False) -> Job:
48    """
49    Return a single Job object.
50    """
51    metadata = self.get_job_metadata(name, debug=debug)
52    if not metadata:
53        raise ValueError(f"Job '{name}' does not exist.")
54
55    return Job(
56        name,
57        metadata['sysargs'],
58        executor_keys=str(self),
59        _properties=metadata['daemon']['properties'],
60    )

Return a single Job object.

def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]:
 63def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]:
 64    """
 65    Return the metadata for a single job.
 66    """
 67    now = time.perf_counter()
 68    _job_metadata_cache = self.__dict__.get('_job_metadata_cache', None)
 69    _job_metadata_timestamp = (
 70        _job_metadata_cache.get(name, {}).get('timestamp', None)
 71    ) if _job_metadata_cache is not None else None
 72
 73    if (
 74        _job_metadata_timestamp is not None
 75        and (now - _job_metadata_timestamp) < JOB_METADATA_CACHE_SECONDS
 76    ):
 77        if debug:
 78            dprint(f"Returning cached metadata for job '{name}'.")
 79        return _job_metadata_cache[name]['metadata']
 80
 81    response = self.get(JOBS_ENDPOINT + f"/{name}", debug=debug)
 82    if not response:
 83        if debug:
 84            msg = (
 85                response.json()['detail']
 86                if 'detail' in response.text
 87                else response.text
 88            )
 89            warn(f"Failed to get metadata for job '{name}':\n{msg}")
 90        return {}
 91
 92    metadata = response.json()
 93    if _job_metadata_cache is None:
 94        self._job_metadata_cache = {}
 95
 96    self._job_metadata_cache[name] = {
 97        'timestamp': now,
 98        'metadata': metadata,
 99    }
100    return metadata

Return the metadata for a single job.

def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]:
102def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]:
103    """
104    Return the daemon properties for a single job.
105    """
106    metadata = self.get_job_metadata(name, debug=debug)
107    return metadata.get('daemon', {}).get('properties', {})

Return the daemon properties for a single job.

def get_job_exists(self, name: str, debug: bool = False) -> bool:
149def get_job_exists(self, name: str, debug: bool = False) -> bool:
150    """
151    Return whether a job exists.
152    """
153    response = self.get(JOBS_ENDPOINT + f'/{name}/exists', debug=debug)
154    if not response:
155        warn(f"Failed to determine whether job '{name}' exists.")
156        return False
157
158    return response.json()

Return whether a job exists.

def delete_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
161def delete_job(self, name: str, debug: bool = False) -> SuccessTuple:
162    """
163    Delete a job.
164    """
165    response = self.delete(JOBS_ENDPOINT + f"/{name}", debug=debug)
166    if not response:
167        if 'detail' in response.text:
168            return False, response.json()['detail']
169
170        return False, response.text
171
172    return tuple(response.json())

Delete a job.

def start_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
175def start_job(self, name: str, debug: bool = False) -> SuccessTuple:
176    """
177    Start a job.
178    """
179    response = self.post(JOBS_ENDPOINT + f"/{name}/start", debug=debug)
180    if not response:
181        if 'detail' in response.text:
182            return False, response.json()['detail']
183        return False, response.text
184
185    return tuple(response.json())

Start a job.

def create_job( self, name: str, sysargs: List[str], properties: Optional[Dict[str, str]] = None, debug: bool = False) -> Tuple[bool, str]:
188def create_job(
189    self,
190    name: str,
191    sysargs: List[str],
192    properties: Optional[Dict[str, str]] = None,
193    debug: bool = False,
194) -> SuccessTuple:
195    """
196    Create a job.
197    """
198    response = self.post(
199        JOBS_ENDPOINT + f"/{name}",
200        json={
201            'sysargs': sysargs,
202            'properties': properties,
203        },
204        debug=debug,
205    )
206    if not response:
207        if 'detail' in response.text:
208            return False, response.json()['detail']
209        return False, response.text
210
211    return tuple(response.json())

Create a job.

def stop_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
214def stop_job(self, name: str, debug: bool = False) -> SuccessTuple:
215    """
216    Stop a job.
217    """
218    response = self.post(JOBS_ENDPOINT + f"/{name}/stop", debug=debug)
219    if not response:
220        if 'detail' in response.text:
221            return False, response.json()['detail']
222        return False, response.text
223
224    return tuple(response.json())

Stop a job.

def pause_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
227def pause_job(self, name: str, debug: bool = False) -> SuccessTuple:
228    """
229    Pause a job.
230    """
231    response = self.post(JOBS_ENDPOINT + f"/{name}/pause", debug=debug)
232    if not response:
233        if 'detail' in response.text:
234            return False, response.json()['detail']
235        return False, response.text
236
237    return tuple(response.json())

Pause a job.

def get_logs(self, name: str, debug: bool = False) -> str:
240def get_logs(self, name: str, debug: bool = False) -> str:
241    """
242    Return the logs for a job.
243    """
244    response = self.get(LOGS_ENDPOINT + f"/{name}")
245    if not response:
246        raise ValueError(f"Cannot fetch logs for job '{name}':\n{response.text}")
247
248    return response.json()

Return the logs for a job.

def get_job_stop_time(self, name: str, debug: bool = False) -> Optional[datetime.datetime]:
251def get_job_stop_time(self, name: str, debug: bool = False) -> Union[datetime, None]:
252    """
253    Return the job's manual stop time.
254    """
255    response = self.get(JOBS_ENDPOINT + f"/{name}/stop_time")
256    if not response:
257        warn(f"Failed to get stop time for job '{name}':\n{response.text}")
258        return None
259
260    data = response.json()
261    if data is None:
262        return None
263
264    return datetime.fromisoformat(data)

Return the job's manual stop time.

def monitor_logs( self, name: str, callback_function: Callable[[Any], Any], input_callback_function: Callable[[NoneType], str], stop_callback_function: Callable[[NoneType], str], stop_on_exit: bool = False, strip_timestamps: bool = False, accept_input: bool = True, debug: bool = False):
348def monitor_logs(
349    self,
350    name: str,
351    callback_function: Callable[[Any], Any],
352    input_callback_function: Callable[[None], str],
353    stop_callback_function: Callable[[None], str],
354    stop_on_exit: bool = False,
355    strip_timestamps: bool = False,
356    accept_input: bool = True,
357    debug: bool = False,
358):
359    """
360    Monitor a job's log files and execute a callback with the changes.
361    """
362    return asyncio.run(
363        self.monitor_logs_async(
364            name,
365            callback_function,
366            input_callback_function=input_callback_function,
367            stop_callback_function=stop_callback_function,
368            stop_on_exit=stop_on_exit,
369            strip_timestamps=strip_timestamps,
370            accept_input=accept_input,
371            debug=debug
372        )
373    )

Monitor a job's log files and execute a callback with the changes.

async def monitor_logs_async( self, name: str, callback_function: Callable[[Any], Any], input_callback_function: Callable[[], str], stop_callback_function: Callable[[Tuple[bool, str]], str], stop_on_exit: bool = False, strip_timestamps: bool = False, accept_input: bool = True, debug: bool = False):
267async def monitor_logs_async(
268    self,
269    name: str,
270    callback_function: Callable[[Any], Any],
271    input_callback_function: Callable[[], str],
272    stop_callback_function: Callable[[SuccessTuple], str],
273    stop_on_exit: bool = False,
274    strip_timestamps: bool = False,
275    accept_input: bool = True,
276    debug: bool = False,
277):
278    """
279    Monitor a job's log files and await a callback with the changes.
280    """
281    import traceback
282    from meerschaum.jobs import StopMonitoringLogs
283    from meerschaum.utils.formatting._jobs import strip_timestamp_from_line
284
285    websockets, websockets_exceptions = mrsm.attempt_import('websockets', 'websockets.exceptions')
286    protocol = 'ws' if self.URI.startswith('http://') else 'wss'
287    port = self.port if 'port' in self.__dict__ else ''
288    uri = f"{protocol}://{self.host}:{port}{LOGS_ENDPOINT}/{name}/ws"
289
290    async def _stdin_callback(client):
291        if input_callback_function is None:
292            return
293
294        if asyncio.iscoroutinefunction(input_callback_function):
295            data = await input_callback_function()
296        else:
297            data = input_callback_function()
298
299        await client.send(data)
300
301    async def _stop_callback(client):
302        try:
303            result = tuple(json.loads(await client.recv()))
304        except Exception as e:
305            warn(traceback.format_exc())
306            result = False, str(e)
307
308        if stop_callback_function is not None:
309            if asyncio.iscoroutinefunction(stop_callback_function):
310                await stop_callback_function(result)
311            else:
312                stop_callback_function(result)
313
314        if stop_on_exit:
315            raise StopMonitoringLogs
316
317    message_callbacks = {
318        JOBS_STDIN_MESSAGE: _stdin_callback,
319        JOBS_STOP_MESSAGE: _stop_callback,
320    }
321
322    async with websockets.connect(uri) as websocket:
323        try:
324            await websocket.send(self.token or 'no-login')
325        except websockets_exceptions.ConnectionClosedOK:
326            pass
327
328        while True:
329            try:
330                response = await websocket.recv()
331                callback = message_callbacks.get(response, None)
332                if callback is not None:
333                    await callback(websocket)
334                    continue
335
336                if strip_timestamps:
337                    response = strip_timestamp_from_line(response)
338
339                if asyncio.iscoroutinefunction(callback_function):
340                    await callback_function(response)
341                else:
342                    callback_function(response)
343            except (KeyboardInterrupt, StopMonitoringLogs):
344                await websocket.close()
345                break

Monitor a job's log files and await a callback with the changes.

def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool:
375def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool:
376    """
377    Return whether a remote job is blocking on stdin.
378    """
379    response = self.get(JOBS_ENDPOINT + f'/{name}/is_blocking_on_stdin', debug=debug)
380    if not response:
381        return False
382
383    return response.json()

Return whether a remote job is blocking on stdin.

def get_job_began(self, name: str, debug: bool = False) -> Optional[str]:
116def get_job_began(self, name: str, debug: bool = False) -> Union[str, None]:
117    """
118    Return a job's `began` timestamp, if it exists.
119    """
120    properties = self.get_job_properties(name, debug=debug)
121    began_str = properties.get('daemon', {}).get('began', None)
122    if began_str is None:
123        return None
124
125    return began_str

Return a job's began timestamp, if it exists.

def get_job_ended(self, name: str, debug: bool = False) -> Optional[str]:
127def get_job_ended(self, name: str, debug: bool = False) -> Union[str, None]:
128    """
129    Return a job's `ended` timestamp, if it exists.
130    """
131    properties = self.get_job_properties(name, debug=debug)
132    ended_str = properties.get('daemon', {}).get('ended', None)
133    if ended_str is None:
134        return None
135
136    return ended_str

Return a job's ended timestamp, if it exists.

def get_job_paused(self, name: str, debug: bool = False) -> Optional[str]:
138def get_job_paused(self, name: str, debug: bool = False) -> Union[str, None]:
139    """
140    Return a job's `paused` timestamp, if it exists.
141    """
142    properties = self.get_job_properties(name, debug=debug)
143    paused_str = properties.get('daemon', {}).get('paused', None)
144    if paused_str is None:
145        return None
146
147    return paused_str

Return a job's paused timestamp, if it exists.

def get_job_status(self, name: str, debug: bool = False) -> str:
109def get_job_status(self, name: str, debug: bool = False) -> str:
110    """
111    Return the job's status.
112    """
113    metadata = self.get_job_metadata(name, debug=debug)
114    return metadata.get('status', 'stopped')

Return the job's status.

def get_connector( type: str = None, label: str = None, refresh: bool = False, debug: bool = False, **kw: Any) -> Connector:
 80def get_connector(
 81    type: str = None,
 82    label: str = None,
 83    refresh: bool = False,
 84    debug: bool = False,
 85    **kw: Any
 86) -> Connector:
 87    """
 88    Return existing connector or create new connection and store for reuse.
 89    
 90    You can create new connectors if enough parameters are provided for the given type and flavor.
 91    
 92
 93    Parameters
 94    ----------
 95    type: Optional[str], default None
 96        Connector type (sql, api, etc.).
 97        Defaults to the type of the configured `instance_connector`.
 98
 99    label: Optional[str], default None
100        Connector label (e.g. main). Defaults to `'main'`.
101
102    refresh: bool, default False
103        Refresh the Connector instance / construct new object. Defaults to `False`.
104
105    kw: Any
106        Other arguments to pass to the Connector constructor.
107        If the Connector has already been constructed and new arguments are provided,
108        `refresh` is set to `True` and the old Connector is replaced.
109
110    Returns
111    -------
112    A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`,
113    `meerschaum.connectors.sql.SQLConnector`).
114    
115    Examples
116    --------
117    The following parameters would create a new
118    `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file.
119
120    ```
121    >>> conn = get_connector(
122    ...     type = 'sql',
123    ...     label = 'newlabel',
124    ...     flavor = 'sqlite',
125    ...     database = '/file/path/to/database.db'
126    ... )
127    >>>
128    ```
129
130    """
131    from meerschaum.connectors.parse import parse_instance_keys
132    from meerschaum.config import get_config
133    from meerschaum.config.static import STATIC_CONFIG
134    from meerschaum.utils.warnings import warn
135    global _loaded_plugin_connectors
136    if isinstance(type, str) and not label and ':' in type:
137        type, label = type.split(':', maxsplit=1)
138
139    with _locks['_loaded_plugin_connectors']:
140        if not _loaded_plugin_connectors:
141            load_plugin_connectors()
142            _load_builtin_custom_connectors()
143            _loaded_plugin_connectors = True
144
145    if type is None and label is None:
146        default_instance_keys = get_config('meerschaum', 'instance', patch=True)
147        ### recursive call to get_connector
148        return parse_instance_keys(default_instance_keys)
149
150    ### NOTE: the default instance connector may not be main.
151    ### Only fall back to 'main' if the type is provided by the label is omitted.
152    label = label if label is not None else STATIC_CONFIG['connectors']['default_label']
153
154    ### type might actually be a label. Check if so and raise a warning.
155    if type not in connectors:
156        possibilities, poss_msg = [], ""
157        for _type in get_config('meerschaum', 'connectors'):
158            if type in get_config('meerschaum', 'connectors', _type):
159                possibilities.append(f"{_type}:{type}")
160        if len(possibilities) > 0:
161            poss_msg = " Did you mean"
162            for poss in possibilities[:-1]:
163                poss_msg += f" '{poss}',"
164            if poss_msg.endswith(','):
165                poss_msg = poss_msg[:-1]
166            if len(possibilities) > 1:
167                poss_msg += " or"
168            poss_msg += f" '{possibilities[-1]}'?"
169
170        warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False)
171        return None
172
173    if 'sql' not in types:
174        from meerschaum.connectors.plugin import PluginConnector
175        from meerschaum.connectors.valkey import ValkeyConnector
176        with _locks['types']:
177            types.update({
178                'api': APIConnector,
179                'sql': SQLConnector,
180                'plugin': PluginConnector,
181                'valkey': ValkeyConnector,
182            })
183
184    ### determine if we need to call the constructor
185    if not refresh:
186        ### see if any user-supplied arguments differ from the existing instance
187        if label in connectors[type]:
188            warning_message = None
189            for attribute, value in kw.items():
190                if attribute not in connectors[type][label].meta:
191                    import inspect
192                    cls = connectors[type][label].__class__
193                    cls_init_signature = inspect.signature(cls)
194                    cls_init_params = cls_init_signature.parameters
195                    if attribute not in cls_init_params:
196                        warning_message = (
197                            f"Received new attribute '{attribute}' not present in connector " +
198                            f"{connectors[type][label]}.\n"
199                        )
200                elif connectors[type][label].__dict__[attribute] != value:
201                    warning_message = (
202                        f"Mismatched values for attribute '{attribute}' in connector "
203                        + f"'{connectors[type][label]}'.\n" +
204                        f"  - Keyword value: '{value}'\n" +
205                        f"  - Existing value: '{connectors[type][label].__dict__[attribute]}'\n"
206                    )
207            if warning_message is not None:
208                warning_message += (
209                    "\nSetting `refresh` to True and recreating connector with type:"
210                    + f" '{type}' and label '{label}'."
211                )
212                refresh = True
213                warn(warning_message)
214        else: ### connector doesn't yet exist
215            refresh = True
216
217    ### only create an object if refresh is True
218    ### (can be manually specified, otherwise determined above)
219    if refresh:
220        with _locks['connectors']:
221            try:
222                ### will raise an error if configuration is incorrect / missing
223                conn = types[type](label=label, **kw)
224                connectors[type][label] = conn
225            except InvalidAttributesError as ie:
226                warn(
227                    f"Incorrect attributes for connector '{type}:{label}'.\n"
228                    + str(ie),
229                    stack = False,
230                )
231                conn = None
232            except Exception as e:
233                from meerschaum.utils.formatting import get_console
234                console = get_console()
235                if console:
236                    console.print_exception()
237                warn(
238                    f"Exception when creating connector '{type}:{label}'.\n" + str(e),
239                    stack = False,
240                )
241                conn = None
242        if conn is None:
243            return None
244
245    return connectors[type][label]

Return existing connector or create new connection and store for reuse.

You can create new connectors if enough parameters are provided for the given type and flavor.

Parameters
  • type (Optional[str], default None): Connector type (sql, api, etc.). Defaults to the type of the configured instance_connector.
  • label (Optional[str], default None): Connector label (e.g. main). Defaults to 'main'.
  • refresh (bool, default False): Refresh the Connector instance / construct new object. Defaults to False.
  • kw (Any): Other arguments to pass to the Connector constructor. If the Connector has already been constructed and new arguments are provided, refresh is set to True and the old Connector is replaced.
Returns
Examples

The following parameters would create a new meerschaum.connectors.sql.SQLConnector that isn't in the configuration file.

>>> conn = get_connector(
...     type = 'sql',
...     label = 'newlabel',
...     flavor = 'sqlite',
...     database = '/file/path/to/database.db'
... )
>>>
def is_connected(keys: str, **kw) -> bool:
248def is_connected(keys: str, **kw) -> bool:
249    """
250    Check if the connector keys correspond to an active connection.
251    If the connector has not been created, it will immediately return `False`.
252    If the connector exists but cannot communicate with the source, return `False`.
253    
254    **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`).
255    Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
256
257    Parameters
258    ----------
259    keys:
260        The keys to the connector (e.g. `'sql:main'`).
261        
262    Returns
263    -------
264    A `bool` corresponding to whether a successful connection may be made.
265
266    """
267    import warnings
268    if ':' not in keys:
269        warn(f"Invalid connector keys '{keys}'")
270
271    try:
272        typ, label = keys.split(':')
273    except Exception:
274        return False
275    if typ not in instance_types:
276        return False
277    if label not in connectors.get(typ, {}):
278        return False
279
280    from meerschaum.connectors.parse import parse_instance_keys
281    conn = parse_instance_keys(keys)
282    try:
283        with warnings.catch_warnings():
284            warnings.filterwarnings('ignore')
285            return conn.test_connection(**kw)
286    except Exception:
287        return False

Check if the connector keys correspond to an active connection. If the connector has not been created, it will immediately return False. If the connector exists but cannot communicate with the source, return False.

NOTE: Only works with instance connectors (SQLConnectors and APIConnectors). Keyword arguments are passed to meerschaum.connectors.poll.retry_connect.

Parameters
  • keys:: The keys to the connector (e.g. 'sql:main').
Returns
  • A bool corresponding to whether a successful connection may be made.