meerschaum.utils.dtypes
Utility functions for working with data types.
1#! /usr/bin/env python3 2# -*- coding: utf-8 -*- 3# vim:fenc=utf-8 4 5""" 6Utility functions for working with data types. 7""" 8 9import traceback 10import json 11import uuid 12from datetime import timezone, datetime 13from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP 14 15import meerschaum as mrsm 16from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple 17from meerschaum.utils.warnings import warn 18 19MRSM_ALIAS_DTYPES: Dict[str, str] = { 20 'decimal': 'numeric', 21 'Decimal': 'numeric', 22 'number': 'numeric', 23 'jsonl': 'json', 24 'JSON': 'json', 25 'binary': 'bytes', 26 'blob': 'bytes', 27 'varbinary': 'bytes', 28 'bytea': 'bytes', 29 'guid': 'uuid', 30 'UUID': 'uuid', 31 'geom': 'geometry', 32 'geog': 'geography', 33} 34MRSM_PD_DTYPES: Dict[Union[str, None], str] = { 35 'json': 'object', 36 'numeric': 'object', 37 'geometry': 'object', 38 'geography': 'object', 39 'uuid': 'object', 40 'datetime': 'datetime64[ns, UTC]', 41 'bool': 'bool[pyarrow]', 42 'int': 'Int64', 43 'int8': 'Int8', 44 'int16': 'Int16', 45 'int32': 'Int32', 46 'int64': 'Int64', 47 'str': 'string[python]', 48 'bytes': 'object', 49 None: 'object', 50} 51 52 53def to_pandas_dtype(dtype: str) -> str: 54 """ 55 Cast a supported Meerschaum dtype to a Pandas dtype. 56 """ 57 known_dtype = MRSM_PD_DTYPES.get(dtype, None) 58 if known_dtype is not None: 59 return known_dtype 60 61 alias_dtype = MRSM_ALIAS_DTYPES.get(dtype, None) 62 if alias_dtype is not None: 63 return MRSM_PD_DTYPES[alias_dtype] 64 65 if dtype.startswith('numeric'): 66 return MRSM_PD_DTYPES['numeric'] 67 68 if dtype.startswith('geometry'): 69 return MRSM_PD_DTYPES['geometry'] 70 71 if dtype.startswith('geography'): 72 return MRSM_PD_DTYPES['geography'] 73 74 ### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps, 75 ### treat it as a SQL db type. 76 if dtype.split(' ')[0].isupper(): 77 from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type 78 return get_pd_type_from_db_type(dtype) 79 80 from meerschaum.utils.packages import attempt_import 81 _ = attempt_import('pyarrow', lazy=False) 82 pandas = attempt_import('pandas', lazy=False) 83 84 try: 85 return str(pandas.api.types.pandas_dtype(dtype)) 86 except Exception: 87 warn( 88 f"Invalid dtype '{dtype}', will use 'object' instead:\n" 89 + f"{traceback.format_exc()}", 90 stack=False, 91 ) 92 return 'object' 93 94 95def are_dtypes_equal( 96 ldtype: Union[str, Dict[str, str]], 97 rdtype: Union[str, Dict[str, str]], 98) -> bool: 99 """ 100 Determine whether two dtype strings may be considered 101 equivalent to avoid unnecessary conversions. 102 103 Parameters 104 ---------- 105 ldtype: Union[str, Dict[str, str]] 106 The left dtype to compare. 107 May also provide a dtypes dictionary. 108 109 rdtype: Union[str, Dict[str, str]] 110 The right dtype to compare. 111 May also provide a dtypes dictionary. 112 113 Returns 114 ------- 115 A `bool` indicating whether the two dtypes are to be considered equivalent. 116 """ 117 if isinstance(ldtype, dict) and isinstance(rdtype, dict): 118 lkeys = sorted([str(k) for k in ldtype.keys()]) 119 rkeys = sorted([str(k) for k in rdtype.keys()]) 120 for lkey, rkey in zip(lkeys, rkeys): 121 if lkey != rkey: 122 return False 123 ltype = ldtype[lkey] 124 rtype = rdtype[rkey] 125 if not are_dtypes_equal(ltype, rtype): 126 return False 127 return True 128 129 try: 130 if ldtype == rdtype: 131 return True 132 except Exception: 133 warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}") 134 return False 135 136 ### Sometimes pandas dtype objects are passed. 137 ldtype = str(ldtype).split('[', maxsplit=1)[0] 138 rdtype = str(rdtype).split('[', maxsplit=1)[0] 139 140 if ldtype in MRSM_ALIAS_DTYPES: 141 ldtype = MRSM_ALIAS_DTYPES[ldtype] 142 143 if rdtype in MRSM_ALIAS_DTYPES: 144 rdtype = MRSM_ALIAS_DTYPES[rdtype] 145 146 json_dtypes = ('json', 'object') 147 if ldtype in json_dtypes and rdtype in json_dtypes: 148 return True 149 150 numeric_dtypes = ('numeric', 'object') 151 if ldtype in numeric_dtypes and rdtype in numeric_dtypes: 152 return True 153 154 uuid_dtypes = ('uuid', 'object') 155 if ldtype in uuid_dtypes and rdtype in uuid_dtypes: 156 return True 157 158 bytes_dtypes = ('bytes', 'object') 159 if ldtype in bytes_dtypes and rdtype in bytes_dtypes: 160 return True 161 162 geometry_dtypes = ('geometry', 'object', 'geography') 163 if ldtype in geometry_dtypes and rdtype in geometry_dtypes: 164 return True 165 166 if ldtype.lower() == rdtype.lower(): 167 return True 168 169 datetime_dtypes = ('datetime', 'timestamp') 170 ldtype_found_dt_prefix = False 171 rdtype_found_dt_prefix = False 172 for dt_prefix in datetime_dtypes: 173 ldtype_found_dt_prefix = (dt_prefix in ldtype.lower()) or ldtype_found_dt_prefix 174 rdtype_found_dt_prefix = (dt_prefix in rdtype.lower()) or rdtype_found_dt_prefix 175 if ldtype_found_dt_prefix and rdtype_found_dt_prefix: 176 return True 177 178 string_dtypes = ('str', 'string', 'object') 179 if ldtype in string_dtypes and rdtype in string_dtypes: 180 return True 181 182 int_dtypes = ('int', 'int64', 'int32', 'int16', 'int8') 183 if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes: 184 return True 185 186 float_dtypes = ('float', 'float64', 'float32', 'float16', 'float128', 'double') 187 if ldtype.lower() in float_dtypes and rdtype.lower() in float_dtypes: 188 return True 189 190 bool_dtypes = ('bool', 'boolean') 191 if ldtype in bool_dtypes and rdtype in bool_dtypes: 192 return True 193 194 return False 195 196 197def is_dtype_numeric(dtype: str) -> bool: 198 """ 199 Determine whether a given `dtype` string 200 should be considered compatible with the Meerschaum dtype `numeric`. 201 202 Parameters 203 ---------- 204 dtype: str 205 The pandas-like dtype string. 206 207 Returns 208 ------- 209 A bool indicating the dtype is compatible with `numeric`. 210 """ 211 dtype_lower = dtype.lower() 212 213 acceptable_substrings = ('numeric', 'float', 'double', 'int') 214 for substring in acceptable_substrings: 215 if substring in dtype_lower: 216 return True 217 218 return False 219 220 221def attempt_cast_to_numeric( 222 value: Any, 223 quantize: bool = False, 224 precision: Optional[int] = None, 225 scale: Optional[int] = None, 226)-> Any: 227 """ 228 Given a value, attempt to coerce it into a numeric (Decimal). 229 230 Parameters 231 ---------- 232 value: Any 233 The value to be cast to a Decimal. 234 235 quantize: bool, default False 236 If `True`, quantize the decimal to the specified precision and scale. 237 238 precision: Optional[int], default None 239 If `quantize` is `True`, use this precision. 240 241 scale: Optional[int], default None 242 If `quantize` is `True`, use this scale. 243 244 Returns 245 ------- 246 A `Decimal` if possible, or `value`. 247 """ 248 if isinstance(value, Decimal): 249 if quantize and precision and scale: 250 return quantize_decimal(value, precision, scale) 251 return value 252 try: 253 if value_is_null(value): 254 return Decimal('NaN') 255 256 dec = Decimal(str(value)) 257 if not quantize or not precision or not scale: 258 return dec 259 return quantize_decimal(dec, precision, scale) 260 except Exception: 261 return value 262 263 264def attempt_cast_to_uuid(value: Any) -> Any: 265 """ 266 Given a value, attempt to coerce it into a UUID (`uuid4`). 267 """ 268 if isinstance(value, uuid.UUID): 269 return value 270 try: 271 return ( 272 uuid.UUID(str(value)) 273 if not value_is_null(value) 274 else None 275 ) 276 except Exception: 277 return value 278 279 280def attempt_cast_to_bytes(value: Any) -> Any: 281 """ 282 Given a value, attempt to coerce it into a bytestring. 283 """ 284 if isinstance(value, bytes): 285 return value 286 try: 287 return ( 288 deserialize_bytes_string(str(value)) 289 if not value_is_null(value) 290 else None 291 ) 292 except Exception: 293 return value 294 295 296def attempt_cast_to_geometry(value: Any) -> Any: 297 """ 298 Given a value, attempt to coerce it into a `shapely` (`geometry`) object. 299 """ 300 shapely, shapely_wkt, shapely_wkb = mrsm.attempt_import( 301 'shapely', 302 'shapely.wkt', 303 'shapely.wkb', 304 lazy=False, 305 ) 306 if 'shapely' in str(type(value)): 307 return value 308 309 if isinstance(value, (dict, list)): 310 try: 311 return shapely.from_geojson(json.dumps(value)) 312 except Exception as e: 313 return value 314 315 value_is_wkt = geometry_is_wkt(value) 316 if value_is_wkt is None: 317 return value 318 319 try: 320 return ( 321 shapely_wkt.loads(value) 322 if value_is_wkt 323 else shapely_wkb.loads(value) 324 ) 325 except Exception: 326 return value 327 328 329def geometry_is_wkt(value: Union[str, bytes]) -> Union[bool, None]: 330 """ 331 Determine whether an input value should be treated as WKT or WKB geometry data. 332 333 Parameters 334 ---------- 335 value: Union[str, bytes] 336 The input data to be parsed into geometry data. 337 338 Returns 339 ------- 340 A `bool` (`True` if `value` is WKT and `False` if it should be treated as WKB). 341 Return `None` if `value` should be parsed as neither. 342 """ 343 import re 344 if not isinstance(value, (str, bytes)): 345 return None 346 347 if isinstance(value, bytes): 348 return False 349 350 wkt_pattern = r'^\s*(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\s*\(.*\)\s*$' 351 if re.match(wkt_pattern, value, re.IGNORECASE): 352 return True 353 354 if all(c in '0123456789ABCDEFabcdef' for c in value) and len(value) % 2 == 0: 355 return False 356 357 return None 358 359 360def value_is_null(value: Any) -> bool: 361 """ 362 Determine if a value is a null-like string. 363 """ 364 return str(value).lower() in ('none', 'nan', 'na', 'nat', '', '<na>') 365 366 367def none_if_null(value: Any) -> Any: 368 """ 369 Return `None` if a value is a null-like string. 370 """ 371 return (None if value_is_null(value) else value) 372 373 374def quantize_decimal(x: Decimal, precision: int, scale: int) -> Decimal: 375 """ 376 Quantize a given `Decimal` to a known scale and precision. 377 378 Parameters 379 ---------- 380 x: Decimal 381 The `Decimal` to be quantized. 382 383 precision: int 384 The total number of significant digits. 385 386 scale: int 387 The number of significant digits after the decimal point. 388 389 Returns 390 ------- 391 A `Decimal` quantized to the specified scale and precision. 392 """ 393 precision_decimal = Decimal(('1' * (precision - scale)) + '.' + ('1' * scale)) 394 try: 395 return x.quantize(precision_decimal, context=Context(prec=precision), rounding=ROUND_HALF_UP) 396 except InvalidOperation: 397 pass 398 399 raise ValueError(f"Cannot quantize value '{x}' to {precision=}, {scale=}.") 400 401 402def serialize_decimal( 403 x: Any, 404 quantize: bool = False, 405 precision: Optional[int] = None, 406 scale: Optional[int] = None, 407) -> Any: 408 """ 409 Return a quantized string of an input decimal. 410 411 Parameters 412 ---------- 413 x: Any 414 The potential decimal to be serialized. 415 416 quantize: bool, default False 417 If `True`, quantize the incoming Decimal to the specified scale and precision 418 before serialization. 419 420 precision: Optional[int], default None 421 The precision of the decimal to be quantized. 422 423 scale: Optional[int], default None 424 The scale of the decimal to be quantized. 425 426 Returns 427 ------- 428 A string of the input decimal or the input if not a Decimal. 429 """ 430 if not isinstance(x, Decimal): 431 return x 432 433 if value_is_null(x): 434 return None 435 436 if quantize and scale and precision: 437 x = quantize_decimal(x, precision, scale) 438 439 return f"{x:f}" 440 441 442def coerce_timezone( 443 dt: Any, 444 strip_utc: bool = False, 445) -> Any: 446 """ 447 Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`, 448 return a UTC timestamp (strip timezone if `strip_utc` is `True`. 449 """ 450 if dt is None: 451 return None 452 453 if isinstance(dt, int): 454 return dt 455 456 if isinstance(dt, str): 457 dateutil_parser = mrsm.attempt_import('dateutil.parser') 458 dt = dateutil_parser.parse(dt) 459 460 dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__') 461 462 if dt_is_series: 463 pandas = mrsm.attempt_import('pandas', lazy=False) 464 465 if ( 466 pandas.api.types.is_datetime64_any_dtype(dt) and ( 467 (dt.dt.tz is not None and not strip_utc) 468 or 469 (dt.dt.tz is None and strip_utc) 470 ) 471 ): 472 return dt 473 474 dt_series = to_datetime(dt, coerce_utc=False) 475 if strip_utc: 476 try: 477 if dt_series.dt.tz is not None: 478 dt_series = dt_series.dt.tz_localize(None) 479 except Exception: 480 pass 481 482 return dt_series 483 484 if dt.tzinfo is None: 485 if strip_utc: 486 return dt 487 return dt.replace(tzinfo=timezone.utc) 488 489 utc_dt = dt.astimezone(timezone.utc) 490 if strip_utc: 491 return utc_dt.replace(tzinfo=None) 492 return utc_dt 493 494 495def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any: 496 """ 497 Wrap `pd.to_datetime()` and add support for out-of-bounds values. 498 """ 499 pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False) 500 is_dask = 'dask' in getattr(dt_val, '__module__', '') 501 dd = mrsm.attempt_import('dask.dataframe') if is_dask else None 502 dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__') 503 pd = pandas if dd is None else dd 504 505 try: 506 new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601') 507 if as_pydatetime: 508 return new_dt_val.to_pydatetime() 509 return new_dt_val 510 except (pd.errors.OutOfBoundsDatetime, ValueError): 511 pass 512 513 def parse(x: Any) -> Any: 514 try: 515 return dateutil_parser.parse(x) 516 except Exception: 517 return x 518 519 if dt_is_series: 520 new_series = dt_val.apply(parse) 521 if coerce_utc: 522 return coerce_timezone(new_series) 523 return new_series 524 525 new_dt_val = parse(dt_val) 526 if not coerce_utc: 527 return new_dt_val 528 return coerce_timezone(new_dt_val) 529 530 531def serialize_bytes(data: bytes) -> str: 532 """ 533 Return the given bytes as a base64-encoded string. 534 """ 535 import base64 536 if not isinstance(data, bytes) and value_is_null(data): 537 return data 538 return base64.b64encode(data).decode('utf-8') 539 540 541def serialize_geometry( 542 geom: Any, 543 geometry_format: str = 'wkb_hex', 544 as_wkt: bool = False, 545) -> Union[str, Dict[str, Any], None]: 546 """ 547 Serialize geometry data as a hex-encoded well-known-binary string. 548 549 Parameters 550 ---------- 551 geom: Any 552 The potential geometry data to be serialized. 553 554 geometry_format: str, default 'wkb_hex' 555 The serialization format for geometry data. 556 Accepted formats are `wkb_hex` (well-known binary hex string), 557 `wkt` (well-known text), and `geojson`. 558 559 Returns 560 ------- 561 A string containing the geometry data. 562 """ 563 if value_is_null(geom): 564 return None 565 shapely = mrsm.attempt_import('shapely', lazy=False) 566 if geometry_format == 'geojson': 567 geojson_str = shapely.to_geojson(geom) 568 return json.loads(geojson_str) 569 570 if hasattr(geom, 'wkb_hex'): 571 return geom.wkb_hex if geometry_format == 'wkb_hex' else geom.wkt 572 573 return str(geom) 574 575 576def deserialize_geometry(geom_wkb: Union[str, bytes]): 577 """ 578 Deserialize a WKB string into a shapely geometry object. 579 """ 580 shapely = mrsm.attempt_import(lazy=False) 581 return shapely.wkb.loads(geom_wkb) 582 583 584def deserialize_bytes_string(data: Optional[str], force_hex: bool = False) -> Union[bytes, None]: 585 """ 586 Given a serialized ASCII string of bytes data, return the original bytes. 587 The input data may either be base64- or hex-encoded. 588 589 Parameters 590 ---------- 591 data: str | None 592 The string to be deserialized into bytes. 593 May be base64- or hex-encoded (prefixed with `'\\x'`). 594 595 force_hex: bool = False 596 If `True`, treat the input string as hex-encoded. 597 If `data` does not begin with the prefix `'\\x'`, set `force_hex` to `True`. 598 This will still strip the leading `'\\x'` prefix if present. 599 600 Returns 601 ------- 602 The original bytes used to produce the encoded string `data`. 603 """ 604 if not isinstance(data, str) and value_is_null(data): 605 return data 606 607 import binascii 608 import base64 609 610 is_hex = force_hex or data.startswith('\\x') 611 612 if is_hex: 613 if data.startswith('\\x'): 614 data = data[2:] 615 return binascii.unhexlify(data) 616 617 return base64.b64decode(data) 618 619 620def deserialize_base64(data: str) -> bytes: 621 """ 622 Return the original bytestring from the given base64-encoded string. 623 """ 624 import base64 625 return base64.b64decode(data) 626 627 628def encode_bytes_for_bytea(data: bytes, with_prefix: bool = True) -> str | None: 629 """ 630 Return the given bytes as a hex string for PostgreSQL's `BYTEA` type. 631 """ 632 import binascii 633 if not isinstance(data, bytes) and value_is_null(data): 634 return data 635 return ('\\x' if with_prefix else '') + binascii.hexlify(data).decode('utf-8') 636 637 638def serialize_datetime(dt: datetime) -> Union[str, None]: 639 """ 640 Serialize a datetime object into JSON (ISO format string). 641 642 Examples 643 -------- 644 >>> import json 645 >>> from datetime import datetime 646 >>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime) 647 '{"a": "2022-01-01T00:00:00Z"}' 648 649 """ 650 if not isinstance(dt, datetime): 651 return None 652 tz_suffix = 'Z' if dt.tzinfo is None else '' 653 return dt.isoformat() + tz_suffix 654 655 656def json_serialize_value(x: Any, default_to_str: bool = True) -> str: 657 """ 658 Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc. 659 660 Parameters 661 ---------- 662 x: Any 663 The value to serialize. 664 665 default_to_str: bool, default True 666 If `True`, return a string of `x` if x is not a designated type. 667 Otherwise return x. 668 669 Returns 670 ------- 671 A serialized version of x, or x. 672 """ 673 if isinstance(x, (mrsm.Pipe, mrsm.connectors.Connector)): 674 return x.meta 675 676 if hasattr(x, 'tzinfo'): 677 return serialize_datetime(x) 678 679 if isinstance(x, bytes): 680 return serialize_bytes(x) 681 682 if isinstance(x, Decimal): 683 return serialize_decimal(x) 684 685 if 'shapely' in str(type(x)): 686 return serialize_geometry(x) 687 688 if value_is_null(x): 689 return None 690 691 return str(x) if default_to_str else x 692 693 694def get_geometry_type_srid( 695 dtype: str = 'geometry', 696 default_type: str = 'geometry', 697 default_srid: int = 4326, 698) -> Union[Tuple[str, int], Tuple[str, None]]: 699 """ 700 Given the specified geometry `dtype`, return a tuple in the form (type, SRID). 701 702 Parameters 703 ---------- 704 dtype: Optional[str], default None 705 Optionally provide a specific `geometry` syntax (e.g. `geometry[MultiLineString, 4326]`). 706 You may specify a supported `shapely` geometry type and an SRID in the dtype modifier: 707 708 - `Point` 709 - `LineString` 710 - `LinearRing` 711 - `Polygon` 712 - `MultiPoint` 713 - `MultiLineString` 714 - `MultiPolygon` 715 - `GeometryCollection` 716 717 Returns 718 ------- 719 A tuple in the form (type, SRID). 720 Defaults to `(default_type, default_srid)`. 721 722 Examples 723 -------- 724 >>> from meerschaum.utils.dtypes import get_geometry_type_srid 725 >>> get_geometry_type_srid() 726 ('geometry', 4326) 727 >>> get_geometry_type_srid('geometry[]') 728 ('geometry', 4326) 729 >>> get_geometry_type_srid('geometry[Point, 0]') 730 ('Point', 0) 731 >>> get_geometry_type_srid('geometry[0, Point]') 732 ('Point', 0) 733 >>> get_geometry_type_srid('geometry[0]') 734 ('geometry', 0) 735 >>> get_geometry_type_srid('geometry[MULTILINESTRING, 4326]') 736 ('MultiLineString', 4326) 737 >>> get_geometry_type_srid('geography') 738 ('geometry', 4326) 739 >>> get_geometry_type_srid('geography[POINT]') 740 ('Point', 4376) 741 """ 742 from meerschaum.utils.misc import is_int 743 ### NOTE: PostGIS syntax must also be parsed. 744 dtype = dtype.replace('(', '[').replace(')', ']') 745 bare_dtype = dtype.split('[', maxsplit=1)[0] 746 modifier = dtype.split(bare_dtype, maxsplit=1)[-1].lstrip('[').rstrip(']') 747 if not modifier: 748 return default_type, default_srid 749 750 parts = [ 751 part.split('=')[-1].strip() 752 for part in modifier.split(',') 753 ] 754 parts_casted = [ 755 ( 756 int(part) 757 if is_int(part) 758 else part 759 ) 760 for part in parts 761 ] 762 763 srid = default_srid 764 geometry_type = default_type 765 766 for part in parts_casted: 767 if isinstance(part, int): 768 srid = part 769 break 770 771 for part in parts_casted: 772 if isinstance(part, str): 773 geometry_type = part 774 break 775 776 return geometry_type, srid
54def to_pandas_dtype(dtype: str) -> str: 55 """ 56 Cast a supported Meerschaum dtype to a Pandas dtype. 57 """ 58 known_dtype = MRSM_PD_DTYPES.get(dtype, None) 59 if known_dtype is not None: 60 return known_dtype 61 62 alias_dtype = MRSM_ALIAS_DTYPES.get(dtype, None) 63 if alias_dtype is not None: 64 return MRSM_PD_DTYPES[alias_dtype] 65 66 if dtype.startswith('numeric'): 67 return MRSM_PD_DTYPES['numeric'] 68 69 if dtype.startswith('geometry'): 70 return MRSM_PD_DTYPES['geometry'] 71 72 if dtype.startswith('geography'): 73 return MRSM_PD_DTYPES['geography'] 74 75 ### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps, 76 ### treat it as a SQL db type. 77 if dtype.split(' ')[0].isupper(): 78 from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type 79 return get_pd_type_from_db_type(dtype) 80 81 from meerschaum.utils.packages import attempt_import 82 _ = attempt_import('pyarrow', lazy=False) 83 pandas = attempt_import('pandas', lazy=False) 84 85 try: 86 return str(pandas.api.types.pandas_dtype(dtype)) 87 except Exception: 88 warn( 89 f"Invalid dtype '{dtype}', will use 'object' instead:\n" 90 + f"{traceback.format_exc()}", 91 stack=False, 92 ) 93 return 'object'
Cast a supported Meerschaum dtype to a Pandas dtype.
96def are_dtypes_equal( 97 ldtype: Union[str, Dict[str, str]], 98 rdtype: Union[str, Dict[str, str]], 99) -> bool: 100 """ 101 Determine whether two dtype strings may be considered 102 equivalent to avoid unnecessary conversions. 103 104 Parameters 105 ---------- 106 ldtype: Union[str, Dict[str, str]] 107 The left dtype to compare. 108 May also provide a dtypes dictionary. 109 110 rdtype: Union[str, Dict[str, str]] 111 The right dtype to compare. 112 May also provide a dtypes dictionary. 113 114 Returns 115 ------- 116 A `bool` indicating whether the two dtypes are to be considered equivalent. 117 """ 118 if isinstance(ldtype, dict) and isinstance(rdtype, dict): 119 lkeys = sorted([str(k) for k in ldtype.keys()]) 120 rkeys = sorted([str(k) for k in rdtype.keys()]) 121 for lkey, rkey in zip(lkeys, rkeys): 122 if lkey != rkey: 123 return False 124 ltype = ldtype[lkey] 125 rtype = rdtype[rkey] 126 if not are_dtypes_equal(ltype, rtype): 127 return False 128 return True 129 130 try: 131 if ldtype == rdtype: 132 return True 133 except Exception: 134 warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}") 135 return False 136 137 ### Sometimes pandas dtype objects are passed. 138 ldtype = str(ldtype).split('[', maxsplit=1)[0] 139 rdtype = str(rdtype).split('[', maxsplit=1)[0] 140 141 if ldtype in MRSM_ALIAS_DTYPES: 142 ldtype = MRSM_ALIAS_DTYPES[ldtype] 143 144 if rdtype in MRSM_ALIAS_DTYPES: 145 rdtype = MRSM_ALIAS_DTYPES[rdtype] 146 147 json_dtypes = ('json', 'object') 148 if ldtype in json_dtypes and rdtype in json_dtypes: 149 return True 150 151 numeric_dtypes = ('numeric', 'object') 152 if ldtype in numeric_dtypes and rdtype in numeric_dtypes: 153 return True 154 155 uuid_dtypes = ('uuid', 'object') 156 if ldtype in uuid_dtypes and rdtype in uuid_dtypes: 157 return True 158 159 bytes_dtypes = ('bytes', 'object') 160 if ldtype in bytes_dtypes and rdtype in bytes_dtypes: 161 return True 162 163 geometry_dtypes = ('geometry', 'object', 'geography') 164 if ldtype in geometry_dtypes and rdtype in geometry_dtypes: 165 return True 166 167 if ldtype.lower() == rdtype.lower(): 168 return True 169 170 datetime_dtypes = ('datetime', 'timestamp') 171 ldtype_found_dt_prefix = False 172 rdtype_found_dt_prefix = False 173 for dt_prefix in datetime_dtypes: 174 ldtype_found_dt_prefix = (dt_prefix in ldtype.lower()) or ldtype_found_dt_prefix 175 rdtype_found_dt_prefix = (dt_prefix in rdtype.lower()) or rdtype_found_dt_prefix 176 if ldtype_found_dt_prefix and rdtype_found_dt_prefix: 177 return True 178 179 string_dtypes = ('str', 'string', 'object') 180 if ldtype in string_dtypes and rdtype in string_dtypes: 181 return True 182 183 int_dtypes = ('int', 'int64', 'int32', 'int16', 'int8') 184 if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes: 185 return True 186 187 float_dtypes = ('float', 'float64', 'float32', 'float16', 'float128', 'double') 188 if ldtype.lower() in float_dtypes and rdtype.lower() in float_dtypes: 189 return True 190 191 bool_dtypes = ('bool', 'boolean') 192 if ldtype in bool_dtypes and rdtype in bool_dtypes: 193 return True 194 195 return False
Determine whether two dtype strings may be considered equivalent to avoid unnecessary conversions.
Parameters
- ldtype (Union[str, Dict[str, str]]): The left dtype to compare. May also provide a dtypes dictionary.
- rdtype (Union[str, Dict[str, str]]): The right dtype to compare. May also provide a dtypes dictionary.
Returns
- A
bool
indicating whether the two dtypes are to be considered equivalent.
198def is_dtype_numeric(dtype: str) -> bool: 199 """ 200 Determine whether a given `dtype` string 201 should be considered compatible with the Meerschaum dtype `numeric`. 202 203 Parameters 204 ---------- 205 dtype: str 206 The pandas-like dtype string. 207 208 Returns 209 ------- 210 A bool indicating the dtype is compatible with `numeric`. 211 """ 212 dtype_lower = dtype.lower() 213 214 acceptable_substrings = ('numeric', 'float', 'double', 'int') 215 for substring in acceptable_substrings: 216 if substring in dtype_lower: 217 return True 218 219 return False
Determine whether a given dtype
string
should be considered compatible with the Meerschaum dtype numeric
.
Parameters
- dtype (str): The pandas-like dtype string.
Returns
- A bool indicating the dtype is compatible with
numeric
.
222def attempt_cast_to_numeric( 223 value: Any, 224 quantize: bool = False, 225 precision: Optional[int] = None, 226 scale: Optional[int] = None, 227)-> Any: 228 """ 229 Given a value, attempt to coerce it into a numeric (Decimal). 230 231 Parameters 232 ---------- 233 value: Any 234 The value to be cast to a Decimal. 235 236 quantize: bool, default False 237 If `True`, quantize the decimal to the specified precision and scale. 238 239 precision: Optional[int], default None 240 If `quantize` is `True`, use this precision. 241 242 scale: Optional[int], default None 243 If `quantize` is `True`, use this scale. 244 245 Returns 246 ------- 247 A `Decimal` if possible, or `value`. 248 """ 249 if isinstance(value, Decimal): 250 if quantize and precision and scale: 251 return quantize_decimal(value, precision, scale) 252 return value 253 try: 254 if value_is_null(value): 255 return Decimal('NaN') 256 257 dec = Decimal(str(value)) 258 if not quantize or not precision or not scale: 259 return dec 260 return quantize_decimal(dec, precision, scale) 261 except Exception: 262 return value
Given a value, attempt to coerce it into a numeric (Decimal).
Parameters
- value (Any): The value to be cast to a Decimal.
- quantize (bool, default False):
If
True
, quantize the decimal to the specified precision and scale. - precision (Optional[int], default None):
If
quantize
isTrue
, use this precision. - scale (Optional[int], default None):
If
quantize
isTrue
, use this scale.
Returns
- A
Decimal
if possible, orvalue
.
265def attempt_cast_to_uuid(value: Any) -> Any: 266 """ 267 Given a value, attempt to coerce it into a UUID (`uuid4`). 268 """ 269 if isinstance(value, uuid.UUID): 270 return value 271 try: 272 return ( 273 uuid.UUID(str(value)) 274 if not value_is_null(value) 275 else None 276 ) 277 except Exception: 278 return value
Given a value, attempt to coerce it into a UUID (uuid4
).
281def attempt_cast_to_bytes(value: Any) -> Any: 282 """ 283 Given a value, attempt to coerce it into a bytestring. 284 """ 285 if isinstance(value, bytes): 286 return value 287 try: 288 return ( 289 deserialize_bytes_string(str(value)) 290 if not value_is_null(value) 291 else None 292 ) 293 except Exception: 294 return value
Given a value, attempt to coerce it into a bytestring.
297def attempt_cast_to_geometry(value: Any) -> Any: 298 """ 299 Given a value, attempt to coerce it into a `shapely` (`geometry`) object. 300 """ 301 shapely, shapely_wkt, shapely_wkb = mrsm.attempt_import( 302 'shapely', 303 'shapely.wkt', 304 'shapely.wkb', 305 lazy=False, 306 ) 307 if 'shapely' in str(type(value)): 308 return value 309 310 if isinstance(value, (dict, list)): 311 try: 312 return shapely.from_geojson(json.dumps(value)) 313 except Exception as e: 314 return value 315 316 value_is_wkt = geometry_is_wkt(value) 317 if value_is_wkt is None: 318 return value 319 320 try: 321 return ( 322 shapely_wkt.loads(value) 323 if value_is_wkt 324 else shapely_wkb.loads(value) 325 ) 326 except Exception: 327 return value
Given a value, attempt to coerce it into a shapely
(geometry
) object.
330def geometry_is_wkt(value: Union[str, bytes]) -> Union[bool, None]: 331 """ 332 Determine whether an input value should be treated as WKT or WKB geometry data. 333 334 Parameters 335 ---------- 336 value: Union[str, bytes] 337 The input data to be parsed into geometry data. 338 339 Returns 340 ------- 341 A `bool` (`True` if `value` is WKT and `False` if it should be treated as WKB). 342 Return `None` if `value` should be parsed as neither. 343 """ 344 import re 345 if not isinstance(value, (str, bytes)): 346 return None 347 348 if isinstance(value, bytes): 349 return False 350 351 wkt_pattern = r'^\s*(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\s*\(.*\)\s*$' 352 if re.match(wkt_pattern, value, re.IGNORECASE): 353 return True 354 355 if all(c in '0123456789ABCDEFabcdef' for c in value) and len(value) % 2 == 0: 356 return False 357 358 return None
Determine whether an input value should be treated as WKT or WKB geometry data.
Parameters
- value (Union[str, bytes]): The input data to be parsed into geometry data.
Returns
- A
bool
(True
ifvalue
is WKT andFalse
if it should be treated as WKB). - Return
None
ifvalue
should be parsed as neither.
361def value_is_null(value: Any) -> bool: 362 """ 363 Determine if a value is a null-like string. 364 """ 365 return str(value).lower() in ('none', 'nan', 'na', 'nat', '', '<na>')
Determine if a value is a null-like string.
368def none_if_null(value: Any) -> Any: 369 """ 370 Return `None` if a value is a null-like string. 371 """ 372 return (None if value_is_null(value) else value)
Return None
if a value is a null-like string.
375def quantize_decimal(x: Decimal, precision: int, scale: int) -> Decimal: 376 """ 377 Quantize a given `Decimal` to a known scale and precision. 378 379 Parameters 380 ---------- 381 x: Decimal 382 The `Decimal` to be quantized. 383 384 precision: int 385 The total number of significant digits. 386 387 scale: int 388 The number of significant digits after the decimal point. 389 390 Returns 391 ------- 392 A `Decimal` quantized to the specified scale and precision. 393 """ 394 precision_decimal = Decimal(('1' * (precision - scale)) + '.' + ('1' * scale)) 395 try: 396 return x.quantize(precision_decimal, context=Context(prec=precision), rounding=ROUND_HALF_UP) 397 except InvalidOperation: 398 pass 399 400 raise ValueError(f"Cannot quantize value '{x}' to {precision=}, {scale=}.")
Quantize a given Decimal
to a known scale and precision.
Parameters
- x (Decimal):
The
Decimal
to be quantized. - precision (int): The total number of significant digits.
- scale (int): The number of significant digits after the decimal point.
Returns
- A
Decimal
quantized to the specified scale and precision.
403def serialize_decimal( 404 x: Any, 405 quantize: bool = False, 406 precision: Optional[int] = None, 407 scale: Optional[int] = None, 408) -> Any: 409 """ 410 Return a quantized string of an input decimal. 411 412 Parameters 413 ---------- 414 x: Any 415 The potential decimal to be serialized. 416 417 quantize: bool, default False 418 If `True`, quantize the incoming Decimal to the specified scale and precision 419 before serialization. 420 421 precision: Optional[int], default None 422 The precision of the decimal to be quantized. 423 424 scale: Optional[int], default None 425 The scale of the decimal to be quantized. 426 427 Returns 428 ------- 429 A string of the input decimal or the input if not a Decimal. 430 """ 431 if not isinstance(x, Decimal): 432 return x 433 434 if value_is_null(x): 435 return None 436 437 if quantize and scale and precision: 438 x = quantize_decimal(x, precision, scale) 439 440 return f"{x:f}"
Return a quantized string of an input decimal.
Parameters
- x (Any): The potential decimal to be serialized.
- quantize (bool, default False):
If
True
, quantize the incoming Decimal to the specified scale and precision before serialization. - precision (Optional[int], default None): The precision of the decimal to be quantized.
- scale (Optional[int], default None): The scale of the decimal to be quantized.
Returns
- A string of the input decimal or the input if not a Decimal.
443def coerce_timezone( 444 dt: Any, 445 strip_utc: bool = False, 446) -> Any: 447 """ 448 Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`, 449 return a UTC timestamp (strip timezone if `strip_utc` is `True`. 450 """ 451 if dt is None: 452 return None 453 454 if isinstance(dt, int): 455 return dt 456 457 if isinstance(dt, str): 458 dateutil_parser = mrsm.attempt_import('dateutil.parser') 459 dt = dateutil_parser.parse(dt) 460 461 dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__') 462 463 if dt_is_series: 464 pandas = mrsm.attempt_import('pandas', lazy=False) 465 466 if ( 467 pandas.api.types.is_datetime64_any_dtype(dt) and ( 468 (dt.dt.tz is not None and not strip_utc) 469 or 470 (dt.dt.tz is None and strip_utc) 471 ) 472 ): 473 return dt 474 475 dt_series = to_datetime(dt, coerce_utc=False) 476 if strip_utc: 477 try: 478 if dt_series.dt.tz is not None: 479 dt_series = dt_series.dt.tz_localize(None) 480 except Exception: 481 pass 482 483 return dt_series 484 485 if dt.tzinfo is None: 486 if strip_utc: 487 return dt 488 return dt.replace(tzinfo=timezone.utc) 489 490 utc_dt = dt.astimezone(timezone.utc) 491 if strip_utc: 492 return utc_dt.replace(tzinfo=None) 493 return utc_dt
Given a datetime
, pandas Timestamp
or Series
of Timestamp
,
return a UTC timestamp (strip timezone if strip_utc
is True
.
496def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any: 497 """ 498 Wrap `pd.to_datetime()` and add support for out-of-bounds values. 499 """ 500 pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False) 501 is_dask = 'dask' in getattr(dt_val, '__module__', '') 502 dd = mrsm.attempt_import('dask.dataframe') if is_dask else None 503 dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__') 504 pd = pandas if dd is None else dd 505 506 try: 507 new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601') 508 if as_pydatetime: 509 return new_dt_val.to_pydatetime() 510 return new_dt_val 511 except (pd.errors.OutOfBoundsDatetime, ValueError): 512 pass 513 514 def parse(x: Any) -> Any: 515 try: 516 return dateutil_parser.parse(x) 517 except Exception: 518 return x 519 520 if dt_is_series: 521 new_series = dt_val.apply(parse) 522 if coerce_utc: 523 return coerce_timezone(new_series) 524 return new_series 525 526 new_dt_val = parse(dt_val) 527 if not coerce_utc: 528 return new_dt_val 529 return coerce_timezone(new_dt_val)
Wrap pd.to_datetime()
and add support for out-of-bounds values.
532def serialize_bytes(data: bytes) -> str: 533 """ 534 Return the given bytes as a base64-encoded string. 535 """ 536 import base64 537 if not isinstance(data, bytes) and value_is_null(data): 538 return data 539 return base64.b64encode(data).decode('utf-8')
Return the given bytes as a base64-encoded string.
542def serialize_geometry( 543 geom: Any, 544 geometry_format: str = 'wkb_hex', 545 as_wkt: bool = False, 546) -> Union[str, Dict[str, Any], None]: 547 """ 548 Serialize geometry data as a hex-encoded well-known-binary string. 549 550 Parameters 551 ---------- 552 geom: Any 553 The potential geometry data to be serialized. 554 555 geometry_format: str, default 'wkb_hex' 556 The serialization format for geometry data. 557 Accepted formats are `wkb_hex` (well-known binary hex string), 558 `wkt` (well-known text), and `geojson`. 559 560 Returns 561 ------- 562 A string containing the geometry data. 563 """ 564 if value_is_null(geom): 565 return None 566 shapely = mrsm.attempt_import('shapely', lazy=False) 567 if geometry_format == 'geojson': 568 geojson_str = shapely.to_geojson(geom) 569 return json.loads(geojson_str) 570 571 if hasattr(geom, 'wkb_hex'): 572 return geom.wkb_hex if geometry_format == 'wkb_hex' else geom.wkt 573 574 return str(geom)
Serialize geometry data as a hex-encoded well-known-binary string.
Parameters
- geom (Any): The potential geometry data to be serialized.
- geometry_format (str, default 'wkb_hex'):
The serialization format for geometry data.
Accepted formats are
wkb_hex
(well-known binary hex string),wkt
(well-known text), andgeojson
.
Returns
- A string containing the geometry data.
577def deserialize_geometry(geom_wkb: Union[str, bytes]): 578 """ 579 Deserialize a WKB string into a shapely geometry object. 580 """ 581 shapely = mrsm.attempt_import(lazy=False) 582 return shapely.wkb.loads(geom_wkb)
Deserialize a WKB string into a shapely geometry object.
585def deserialize_bytes_string(data: Optional[str], force_hex: bool = False) -> Union[bytes, None]: 586 """ 587 Given a serialized ASCII string of bytes data, return the original bytes. 588 The input data may either be base64- or hex-encoded. 589 590 Parameters 591 ---------- 592 data: str | None 593 The string to be deserialized into bytes. 594 May be base64- or hex-encoded (prefixed with `'\\x'`). 595 596 force_hex: bool = False 597 If `True`, treat the input string as hex-encoded. 598 If `data` does not begin with the prefix `'\\x'`, set `force_hex` to `True`. 599 This will still strip the leading `'\\x'` prefix if present. 600 601 Returns 602 ------- 603 The original bytes used to produce the encoded string `data`. 604 """ 605 if not isinstance(data, str) and value_is_null(data): 606 return data 607 608 import binascii 609 import base64 610 611 is_hex = force_hex or data.startswith('\\x') 612 613 if is_hex: 614 if data.startswith('\\x'): 615 data = data[2:] 616 return binascii.unhexlify(data) 617 618 return base64.b64decode(data)
Given a serialized ASCII string of bytes data, return the original bytes. The input data may either be base64- or hex-encoded.
Parameters
- data (str | None):
The string to be deserialized into bytes.
May be base64- or hex-encoded (prefixed with
'\x'
). - force_hex (bool = False):
If
True
, treat the input string as hex-encoded. Ifdata
does not begin with the prefix'\x'
, setforce_hex
toTrue
. This will still strip the leading'\x'
prefix if present.
Returns
- The original bytes used to produce the encoded string
data
.
621def deserialize_base64(data: str) -> bytes: 622 """ 623 Return the original bytestring from the given base64-encoded string. 624 """ 625 import base64 626 return base64.b64decode(data)
Return the original bytestring from the given base64-encoded string.
629def encode_bytes_for_bytea(data: bytes, with_prefix: bool = True) -> str | None: 630 """ 631 Return the given bytes as a hex string for PostgreSQL's `BYTEA` type. 632 """ 633 import binascii 634 if not isinstance(data, bytes) and value_is_null(data): 635 return data 636 return ('\\x' if with_prefix else '') + binascii.hexlify(data).decode('utf-8')
Return the given bytes as a hex string for PostgreSQL's BYTEA
type.
639def serialize_datetime(dt: datetime) -> Union[str, None]: 640 """ 641 Serialize a datetime object into JSON (ISO format string). 642 643 Examples 644 -------- 645 >>> import json 646 >>> from datetime import datetime 647 >>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime) 648 '{"a": "2022-01-01T00:00:00Z"}' 649 650 """ 651 if not isinstance(dt, datetime): 652 return None 653 tz_suffix = 'Z' if dt.tzinfo is None else '' 654 return dt.isoformat() + tz_suffix
Serialize a datetime object into JSON (ISO format string).
Examples
>>> import json
>>> from datetime import datetime
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
'{"a": "2022-01-01T00:00:00Z"}'
657def json_serialize_value(x: Any, default_to_str: bool = True) -> str: 658 """ 659 Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc. 660 661 Parameters 662 ---------- 663 x: Any 664 The value to serialize. 665 666 default_to_str: bool, default True 667 If `True`, return a string of `x` if x is not a designated type. 668 Otherwise return x. 669 670 Returns 671 ------- 672 A serialized version of x, or x. 673 """ 674 if isinstance(x, (mrsm.Pipe, mrsm.connectors.Connector)): 675 return x.meta 676 677 if hasattr(x, 'tzinfo'): 678 return serialize_datetime(x) 679 680 if isinstance(x, bytes): 681 return serialize_bytes(x) 682 683 if isinstance(x, Decimal): 684 return serialize_decimal(x) 685 686 if 'shapely' in str(type(x)): 687 return serialize_geometry(x) 688 689 if value_is_null(x): 690 return None 691 692 return str(x) if default_to_str else x
Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc.
Parameters
- x (Any): The value to serialize.
- default_to_str (bool, default True):
If
True
, return a string ofx
if x is not a designated type. Otherwise return x.
Returns
- A serialized version of x, or x.
695def get_geometry_type_srid( 696 dtype: str = 'geometry', 697 default_type: str = 'geometry', 698 default_srid: int = 4326, 699) -> Union[Tuple[str, int], Tuple[str, None]]: 700 """ 701 Given the specified geometry `dtype`, return a tuple in the form (type, SRID). 702 703 Parameters 704 ---------- 705 dtype: Optional[str], default None 706 Optionally provide a specific `geometry` syntax (e.g. `geometry[MultiLineString, 4326]`). 707 You may specify a supported `shapely` geometry type and an SRID in the dtype modifier: 708 709 - `Point` 710 - `LineString` 711 - `LinearRing` 712 - `Polygon` 713 - `MultiPoint` 714 - `MultiLineString` 715 - `MultiPolygon` 716 - `GeometryCollection` 717 718 Returns 719 ------- 720 A tuple in the form (type, SRID). 721 Defaults to `(default_type, default_srid)`. 722 723 Examples 724 -------- 725 >>> from meerschaum.utils.dtypes import get_geometry_type_srid 726 >>> get_geometry_type_srid() 727 ('geometry', 4326) 728 >>> get_geometry_type_srid('geometry[]') 729 ('geometry', 4326) 730 >>> get_geometry_type_srid('geometry[Point, 0]') 731 ('Point', 0) 732 >>> get_geometry_type_srid('geometry[0, Point]') 733 ('Point', 0) 734 >>> get_geometry_type_srid('geometry[0]') 735 ('geometry', 0) 736 >>> get_geometry_type_srid('geometry[MULTILINESTRING, 4326]') 737 ('MultiLineString', 4326) 738 >>> get_geometry_type_srid('geography') 739 ('geometry', 4326) 740 >>> get_geometry_type_srid('geography[POINT]') 741 ('Point', 4376) 742 """ 743 from meerschaum.utils.misc import is_int 744 ### NOTE: PostGIS syntax must also be parsed. 745 dtype = dtype.replace('(', '[').replace(')', ']') 746 bare_dtype = dtype.split('[', maxsplit=1)[0] 747 modifier = dtype.split(bare_dtype, maxsplit=1)[-1].lstrip('[').rstrip(']') 748 if not modifier: 749 return default_type, default_srid 750 751 parts = [ 752 part.split('=')[-1].strip() 753 for part in modifier.split(',') 754 ] 755 parts_casted = [ 756 ( 757 int(part) 758 if is_int(part) 759 else part 760 ) 761 for part in parts 762 ] 763 764 srid = default_srid 765 geometry_type = default_type 766 767 for part in parts_casted: 768 if isinstance(part, int): 769 srid = part 770 break 771 772 for part in parts_casted: 773 if isinstance(part, str): 774 geometry_type = part 775 break 776 777 return geometry_type, srid
Given the specified geometry dtype
, return a tuple in the form (type, SRID).
Parameters
dtype (Optional[str], default None): Optionally provide a specific
geometry
syntax (e.g.geometry[MultiLineString, 4326]
). You may specify a supportedshapely
geometry type and an SRID in the dtype modifier:Point
LineString
LinearRing
Polygon
MultiPoint
MultiLineString
MultiPolygon
GeometryCollection
Returns
- A tuple in the form (type, SRID).
- Defaults to
(default_type, default_srid)
.
Examples
>>> from meerschaum.utils.dtypes import get_geometry_type_srid
>>> get_geometry_type_srid()
('geometry', 4326)
>>> get_geometry_type_srid('geometry[]')
('geometry', 4326)
>>> get_geometry_type_srid('geometry[Point, 0]')
('Point', 0)
>>> get_geometry_type_srid('geometry[0, Point]')
('Point', 0)
>>> get_geometry_type_srid('geometry[0]')
('geometry', 0)
>>> get_geometry_type_srid('geometry[MULTILINESTRING, 4326]')
('MultiLineString', 4326)
>>> get_geometry_type_srid('geography')
('geometry', 4326)
>>> get_geometry_type_srid('geography[POINT]')
('Point', 4376)