cattrs_serialization.py (40654B)
1 import abc 2 import asyncio 3 import base64 4 import builtins 5 import datetime 6 import functools 7 import importlib 8 import inspect 9 import json 10 from decimal import Decimal 11 from enum import Enum 12 from functools import cached_property 13 from functools import lru_cache 14 from functools import partial 15 from pathlib import Path 16 from pathlib import PosixPath 17 from types import NoneType 18 from types import UnionType 19 from typing import Any 20 from typing import Callable 21 from typing import ForwardRef 22 from typing import Hashable 23 from typing import Mapping 24 from typing import TypeVar 25 from typing import Union 26 from typing import cast 27 from typing import get_origin 28 from uuid import UUID 29 30 import anyio 31 import attr 32 from cachetools import LRUCache 33 from cattrs import Converter 34 from cattrs._compat import is_generic 35 from cattrs.gen import make_dict_unstructure_fn 36 from cattrs.gen import override 37 from httpx import URL 38 from humps import camelize # pyre-ignore[21]: pyre doesn't understand this import 39 from pydantic import BaseModel 40 from pydantic_core import PydanticUndefined 41 42 from vet.imbue_core.errors import ImbueError 43 from vet.imbue_core.fixed_traceback import FixedTraceback 44 from vet.imbue_core.frozen_utils import FrozenDict 45 from vet.imbue_core.frozen_utils import FrozenMapping 46 from vet.imbue_core.serialization import SerializedException 47 from vet.imbue_core.serialization_types import Serializable 48 49 T = TypeVar("T") 50 TYPE_KEY = "__type" 51 EXCEPTION_KEY = "__exception" 52 53 # LABELS for marking attributes with special handling 54 DONT_SERIALIZE_METADATA_KEY = "_imbue_dont_serialize" 55 DONT_SERIALIZE = {DONT_SERIALIZE_METADATA_KEY: True} 56 SERIALIZE_WITH_DEFAULT_KEY = "_imbue_serialize_with_default" 57 SERIALIZE_WITH_DEFAULT = {SERIALIZE_WITH_DEFAULT_KEY: True} 58 59 SERIALIZABLE_PROPERTY_KEY = "_imbue_is_serializable_property" 60 CACHED_SERIALIZABLE_PROPERTY_KEY = "_imbue_is_cached_serializable_property" 61 62 63 ########################################################################################## 64 # UTILITY FUNCTIONS 65 ########################################################################################## 66 67 68 def _safe_issubclass(t1: type, t2: type) -> bool: 69 return inspect.isclass(t1) and issubclass(t1, t2) 70 71 72 def _is_frozen_mapping_type(t: type) -> bool: 73 return _safe_issubclass(get_origin(t) or t, FrozenMapping) 74 75 76 def _is_mapping_type(t: type) -> bool: 77 return _safe_issubclass(get_origin(t) or t, Mapping) 78 79 80 _ALLOWED_SPECIAL_MAPPING_TYPES = (LRUCache,) 81 82 83 def _is_special_mapping_type(t: type) -> bool: 84 return t in _ALLOWED_SPECIAL_MAPPING_TYPES 85 86 87 def _is_str_type_special_mapping_type(t: str) -> bool: 88 return t in [_type_to_string(t, fully_qualified=True) for t in _ALLOWED_SPECIAL_MAPPING_TYPES] 89 90 91 def _is_obj_supported_primitive(obj: Any) -> bool: 92 return type(obj) in {bool, int, float, str, NoneType} 93 94 95 def _type_to_string(t: type, fully_qualified: bool) -> str: 96 name = t.__name__ 97 if fully_qualified: 98 return f"{t.__module__}.{name}" 99 else: 100 return name 101 102 103 def _type_from_string(type_str: str) -> Any: 104 if "[" in type_str: 105 class_details, _ = type_str.split("[", 1) 106 else: 107 class_details = type_str 108 if "." in class_details: 109 module_path, class_name = class_details.rsplit(".", 1) 110 module = importlib.import_module(module_path) 111 else: 112 class_name = class_details 113 module = builtins 114 result = getattr(module, class_name) 115 return result 116 117 118 def get_serializable_properties(obj: Any) -> dict[str, Any]: 119 members = inspect.getmembers(type(obj)) 120 marked_members = {} 121 for name, member in members: 122 if is_serializable_property(member): 123 marked_members[name] = getattr(obj, name) 124 return marked_members 125 126 127 def is_serializable_property(func: Callable) -> bool: 128 return getattr(func, CACHED_SERIALIZABLE_PROPERTY_KEY, False) or ( 129 isinstance(func, property) and getattr(func.fget, SERIALIZABLE_PROPERTY_KEY, False) 130 ) 131 132 133 def cached_serializable_property(func: Callable[..., T]) -> cached_property[T]: 134 property_to_return = cached_property(func) 135 setattr(property_to_return, CACHED_SERIALIZABLE_PROPERTY_KEY, True) 136 return property_to_return 137 138 139 def serializable_property(func: Callable[..., T]) -> property: 140 property_to_return = func 141 # NOTE: this will be stored in the fget attribute of the property, which is also the function 142 # we are decorating, so we must check in `func.fget` to see if the property is serializable. 143 # We need to do it this way because we cannot set the attribute on the property object/wrapper 144 # itself, because of the way the inbuilt `property` decorator works. 145 setattr(property_to_return, SERIALIZABLE_PROPERTY_KEY, True) 146 return property(property_to_return) 147 148 149 def get_dont_serialize_member_names_of_type(obj_type: type) -> list[str]: 150 if not attr.has(obj_type): 151 return [] 152 return [field.name for field in attr.fields(obj_type) if field.metadata.get(DONT_SERIALIZE_METADATA_KEY, False)] 153 154 155 def get_serialize_with_default_member_names_of_type( 156 obj_type: type, 157 ) -> Mapping[str, Any]: 158 if _safe_issubclass(obj_type, BaseModel): 159 model_fields = getattr(obj_type, "model_fields", {}) 160 return { 161 name: None if field.default == PydanticUndefined else field.default for name, field in model_fields.items() 162 } 163 if not attr.has(obj_type): 164 return {} 165 return { 166 field.name: None if field.default == attr.NOTHING else field.default 167 for field in attr.fields(obj_type) 168 if field.metadata.get(SERIALIZE_WITH_DEFAULT_KEY, False) 169 } 170 171 172 def get_dont_serialize_member_names(obj: Any) -> list[str]: 173 if not attr.has(obj): 174 return [] 175 members = inspect.getmembers(obj) 176 marked_members = [] 177 for name, _ in members: 178 if is_dont_serialize_member(obj, name): 179 marked_members.append(name) 180 return marked_members 181 182 183 def is_dont_serialize_member(obj: Any, member_name: str) -> bool: 184 if not attr.has(obj): 185 return False 186 for field in attr.fields(obj.__class__): # type: ignore 187 if field.name == member_name: 188 return bool(field.metadata.get(DONT_SERIALIZE_METADATA_KEY, False)) 189 return False 190 191 192 class SerializationError(ImbueError): 193 """Raised when we encounter problems related to Serialization or Deserialization.""" 194 195 196 def _to_json_dumpable_object_without_type_keys(data: Any) -> Any: 197 if isinstance(data, dict): 198 if data.get(TYPE_KEY, "") in { 199 _type_to_string(PosixPath, fully_qualified=True), 200 _type_to_string(Path, fully_qualified=True), 201 _type_to_string(UUID, fully_qualified=True), 202 }: 203 return data["value"] 204 else: 205 return { 206 key: _to_json_dumpable_object_without_type_keys(value) for key, value in data.items() if key != TYPE_KEY 207 } 208 elif isinstance(data, list): 209 return [_to_json_dumpable_object_without_type_keys(item) for item in data] 210 elif _is_obj_supported_primitive(data): 211 return data 212 else: 213 return str(data) 214 215 216 def _camelize_keys_which_represent_python_names(data: Any) -> Any: 217 """Converts JSON-style objects to use camel case keys. 218 219 Takes a JSON-style object produced by CONVERTER.structure and returns the same object with certain 220 keys converted to camel case. Camel cases keys which are derived from names of Python attributes and properties. 221 Does not camel-case keys which were keys of dictionaries before serialization. 222 223 See cattrs_serialization_test.test_camel_casing for an example. 224 """ 225 if isinstance(data, dict): 226 if TYPE_KEY not in data or issubclass(_type_from_string(data[TYPE_KEY]), Mapping): 227 return {key: _camelize_keys_which_represent_python_names(value) for key, value in data.items()} 228 else: 229 # pyre-ignore[16]: pyre doesn't understand the import of camelize 230 return {camelize(key): _camelize_keys_which_represent_python_names(value) for key, value in data.items()} 231 elif isinstance(data, list): 232 return [_camelize_keys_which_represent_python_names(item) for item in data] 233 else: 234 return data 235 236 237 ########################################################################################## 238 # CLASS-SPECIFIC HOOKS 239 ########################################################################################## 240 241 242 class _ShouldDeserialize: 243 pass 244 245 246 # FIXME: Types such as LRUCache will always serialize without errors since they inherit from Mapping but will not deserialize correctly. 247 # We should either document this behavior or change it so that the serialization fails if the type is not supported. 248 def _serialize_mapping_to_json_dict(data: Mapping, converter: Converter) -> Any: 249 assert _is_mapping_type(type(data)), f"Attempted to serialize object of type {type(data)} as a mapping." 250 return {str(converter.unstructure(k)): converter.unstructure(v) for k, v in data.items()} 251 252 253 def _serialize_mapping(data: Mapping, converter: Converter) -> Any: 254 assert _is_mapping_type(type(data)), f"Attempted to serialize object of type {type(data)} as a mapping." 255 entries = [(converter.unstructure(k), converter.unstructure(v)) for k, v in data.items()] 256 return { 257 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 258 "__entries": entries, 259 } 260 261 262 def _deserialize_special_mapping_types(data: dict, type_key: str) -> Mapping: 263 if type_key == _type_to_string(LRUCache, fully_qualified=True): 264 # FIXME: We're not serializing the object correctly and so the deserialization is hacky 265 obj: LRUCache = LRUCache(maxsize=10000) 266 return obj 267 else: 268 raise ValueError(f"Unsupported type {type_key}") 269 270 271 def _deserialize_mapping(data: dict, mapping_type: type, converter: Converter) -> Mapping: 272 if TYPE_KEY in data and _is_str_type_special_mapping_type(data[TYPE_KEY]): 273 return _deserialize_special_mapping_types(data, data[TYPE_KEY]) 274 275 out = {} 276 if "__entries" in data: 277 entries = data["__entries"] 278 else: 279 # We keep this branch for backwards compatibility with mappings serialized as dictionaries. 280 # We do not support Yasoo's DictWithSerializedKeys -- those will need to be migrated to the new format. 281 if TYPE_KEY in data: 282 del data[TYPE_KEY] 283 entries = data.items() 284 285 for k, v in entries: 286 out[converter.structure(k, _ShouldDeserialize)] = converter.structure(v, _ShouldDeserialize) 287 288 if _is_frozen_mapping_type(mapping_type): 289 return FrozenDict(out) 290 return out 291 292 293 def _serialize_frozen_set(data: frozenset, converter: Converter) -> dict: 294 assert type(data) is frozenset, f"Attempted to serialize object of type {type(data)} as a frozenset." 295 value = converter.unstructure(data, unstructure_as=list) 296 return {"value": value, TYPE_KEY: _type_to_string(type(data), fully_qualified=True)} 297 298 299 def _deserialize_frozen_set(data: dict, _: type, converter: Converter) -> frozenset: 300 return frozenset(converter.structure(data["value"], list)) 301 302 303 def _serialize_uuid(data: UUID) -> dict: 304 if type(data) is UUID: 305 return { 306 "value": data.hex, 307 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 308 } 309 elif type(data) is str: 310 return {"value": data, TYPE_KEY: _type_to_string(UUID, fully_qualified=True)} 311 else: 312 raise TypeError("Tried to serialize " + str(data) + ", which is neither a string nor a UUID, as a UUID.") 313 314 315 def _deserialize_uuid(data: dict[str, str] | str, _: type) -> UUID: 316 if isinstance(data, dict): 317 return UUID(data["value"]) 318 elif isinstance(data, str): 319 return UUID(data) 320 else: 321 raise TypeError("Tried to deserialize something which is neither a string nor a dictionary, as a UUID.") 322 323 324 def _serialize_tuple(data: tuple, converter: Converter) -> dict: 325 assert type(data) is tuple, f"Attempted to serialize object of type {type(data)} as a tuple." 326 return { 327 "value": [converter.unstructure(x) for x in data], 328 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 329 } 330 331 332 def _deserialize_tuple(data: dict, _: type, converter: Converter) -> tuple: 333 return tuple(converter.structure(x, _ShouldDeserialize) for x in data["value"]) 334 335 336 def _serialize_url(data: URL) -> dict: 337 assert type(data) is URL, f"Tried to serialize {data} which is not a URL." 338 return { 339 "value": str(data), 340 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 341 } 342 343 344 def _deserialize_url(data: dict, _: type) -> URL: 345 return URL(data["value"]) 346 347 348 def _serialize_decimal(data: Decimal) -> dict: 349 assert type(data) is Decimal, f"Attempted to serialize object of type {type(data)} as a Decimal." 350 return { 351 "value": str(data), 352 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 353 } 354 355 356 def _deserialize_decimal(data: dict, _: type) -> Decimal: 357 return Decimal(data["value"]) 358 359 360 def _serialize_traceback(data: FixedTraceback) -> dict: 361 assert _safe_issubclass( 362 type(data), FixedTraceback 363 ), f"Attempted to serialize object of type {type(data)} as a traceback." 364 return { 365 "value": data.to_dict(), 366 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 367 } 368 369 370 def _deserialize_traceback(data: dict, _: type) -> FixedTraceback: 371 return FixedTraceback.from_dict(data["value"]) 372 373 374 def _serialize_path(data: Path) -> dict: 375 assert _safe_issubclass(type(data), Path), f"Attempted to serialize an object of type {type(data)} as a Path." 376 return { 377 "value": str(data), 378 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 379 } 380 381 382 def _deserialize_path(data: Any, _: type) -> Path: 383 if type(data) is dict: 384 return Path(data["value"]) 385 return Path(data) 386 387 388 def _serialize_anyio_path(data: anyio.Path) -> dict: 389 assert _safe_issubclass(type(data), anyio.Path), f"Attempted to serialize an object of type {type(data)} as a Path." 390 return { 391 "value": str(data), 392 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 393 } 394 395 396 def _deserialize_anyio_path(data: Any, _: type) -> anyio.Path: 397 if type(data) is dict: 398 return anyio.Path(data["value"]) 399 return anyio.Path(data) 400 401 402 def _serialize_datetime(data: datetime.datetime) -> dict: 403 assert _safe_issubclass( 404 type(data), datetime.datetime 405 ), f"Attempted to serialize object of type {type(data)} as a datetime." 406 return { 407 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 408 "time": data.astimezone(datetime.timezone.utc).timestamp(), 409 "tzaware": data.tzinfo is not None, 410 } 411 412 413 def _deserialize_datetime(data: dict, _: type) -> datetime.datetime: 414 return datetime.datetime.fromtimestamp(data["time"], datetime.timezone.utc if data.get("tzaware", None) else None) 415 416 417 def _serialize_bytes(data: bytes) -> dict: 418 assert type(data) is bytes, f"Attempted to serialize object of type {type(data)} as bytes." 419 return { 420 TYPE_KEY: _type_to_string(type(data), fully_qualified=True), 421 # use ascii since base64 guarantees ascii characters only 422 "value": base64.b64encode(data).decode("ascii"), 423 } 424 425 426 def _deserialize_bytes(data: dict, _: type) -> bytes: 427 return base64.b64decode(data["value"]) 428 429 430 def _is_forward_ref(t: type) -> bool: 431 return isinstance(t, ForwardRef) 432 433 434 def _serialize_forward_ref(data: Any, converter: Converter) -> Any: 435 return converter.unstructure(data, unstructure_as=type(data)) 436 437 438 def _deserialize_forward_ref(data: Any, _: type, converter: Converter) -> Any: 439 # TODO: think of a way to evaluate the ForwardRef _, to improve type safety. 440 # Once we do that, we can swap out the evaluated type for ShouldDeserialize 441 # and enforce that we're getting an object of the correct type. 442 return _deserialize_serialized_object(data, _ShouldDeserialize, converter) 443 444 445 def _is_union_type(t: type) -> bool: 446 origin = get_origin(t) 447 return origin is Union or origin is UnionType 448 449 450 def _deserialize_union_type(data: Any, type_of_data: type, converter: Converter) -> Any: 451 return converter.structure(data, _ShouldDeserialize) 452 453 454 def _serialize_enum(data: Enum, converter: Converter) -> Any: 455 assert inspect.isclass(type(data)) and issubclass( 456 type(data), Enum 457 ), f"Attempted to serialize object of type {type(data)} as an Enum." 458 return converter._unstructure_enum(data) 459 460 461 def _deserialize_enum(data: dict[str, str] | str, t: type) -> Any: 462 # We include this complicated logic to preserve backwards compatibility with old JSON that was 463 # serialized by Yasoo. Yasoo serialized enums by converting them into the form 464 # {"__type": "...", "value": "..."}. Strangely, Yasoo converted this dictionary into a string 465 # whenever an enum value occurred as a dictionary key, but did not convert it into a string 466 # when it occurred anywhere else. Hence we need to handle enums that are represented by 467 # dictionaries, stringified dictionaries, and strings. 468 469 assert _safe_issubclass(t, Enum) 470 471 if isinstance(data, str): 472 try: 473 # This is the case where data is an enum value, serialized by Cattrs. 474 return t(data) 475 except ValueError: 476 # This is the case where data is a stringified dictionary, serialized by Yasoo. 477 data_as_dict = json.loads(data) 478 return t[data_as_dict["value"]] # type: ignore 479 else: 480 # This is the case where data is a dictionary, serialized by Yasoo. 481 return t[data["value"]] # type: ignore 482 483 484 ########################################################################################## 485 # TYPE KEY LOGIC 486 ########################################################################################## 487 488 489 class _AvoidTypeKeyLogic: 490 pass 491 492 493 @lru_cache 494 def flag_to_ignore_type_key_hooks(t: type) -> type: 495 class GivenTypeFlaggedToAvoidTypeKeyLogic(t, _AvoidTypeKeyLogic): 496 pass 497 498 GivenTypeFlaggedToAvoidTypeKeyLogic.__name__ = t.__name__ 499 GivenTypeFlaggedToAvoidTypeKeyLogic.__qualname__ = t.__qualname__ 500 501 # pyre-fixme[16]: pyre doesn't understand dynamically created classes 502 return GivenTypeFlaggedToAvoidTypeKeyLogic 503 504 505 def get_pydantic_model_attributes(model: BaseModel) -> dict[str, Any]: 506 # This is a hack to dump only the top level but also avoid dumping any properties 507 attributes = getattr(type(model), "model_fields", {}) 508 return {a: getattr(model, a) for a in attributes} 509 510 511 # These two factory functions produce the functions for serializing attr classes. 512 # Only one of them should be registered at a time, depending on whether we are including 513 # do-not-serialize fields in the serialization. 514 def _serialize_attr_class_factory(cls: type, converter: Converter) -> Callable[[Any], Any]: 515 return make_dict_unstructure_fn(cls, converter) 516 517 518 def _serialize_attr_class_without_dont_serialize_fields( 519 cls: type, converter: Converter, is_camel_case: bool 520 ) -> Callable[[Any], Any]: 521 members_to_omit = get_dont_serialize_member_names_of_type(cls) 522 omit_kwargs = {name: override(omit=True) for name in members_to_omit} 523 return make_dict_unstructure_fn(cls, converter, **omit_kwargs) # type: ignore 524 525 526 def _serialize_with_type_key(data: Any, converter: Converter, for_javascript: bool = False) -> Any: 527 type_of_data = type(data) 528 529 if _is_obj_supported_primitive(data) or isinstance(data, list) or isinstance(data, tuple): 530 # This means that data was annotated as a Serializable, but it is a primitive or a tuple. 531 return converter.unstructure(data, unstructure_as=type_of_data) 532 533 type_of_data_with_typekey_already_added = flag_to_ignore_type_key_hooks(type_of_data) # type: ignore 534 535 # This is a hack which is necessary because cattrs does not work well with Protocols. 536 # Protocols are generic classes, but they don't have __orig_bases__, which cattrs 537 # assumes them to have. 538 if is_generic(type_of_data_with_typekey_already_added): 539 old_orig_bases = getattr(type_of_data_with_typekey_already_added, "__orig_bases__", ()) 540 setattr(type_of_data_with_typekey_already_added, "__orig_bases__", old_orig_bases) 541 542 if isinstance(data, BaseModel): 543 # This is a shortcut: when you encounter a Pydantic model, just use Pydantic serialization. 544 # NOTE: currently we don't support `DONT_SERIALIZE` fields in pydantic models. 545 # so we just serialize all fields. 546 unstructured = data.model_dump(by_alias=for_javascript, mode="json") 547 else: 548 unstructured = converter.unstructure(data, unstructure_as=type_of_data_with_typekey_already_added) 549 550 assert isinstance(unstructured, dict) 551 552 if for_javascript: 553 unstructured.update({k: converter.unstructure(v) for k, v in get_serializable_properties(data).items()}) 554 555 return { 556 TYPE_KEY: _type_to_string(type_of_data, fully_qualified=True), 557 **unstructured, 558 } 559 560 561 # This is the predicate used in the factory functions above, so they trigger for serializable and attr classes 562 # that have had their type key logic handled. 563 def _should_serialize_without_type_key(t: type) -> bool: 564 is_serializable_class = _safe_issubclass(t, Serializable) or attr.has(t) or _safe_issubclass(t, BaseModel) 565 return is_serializable_class and _safe_issubclass(t, _AvoidTypeKeyLogic) 566 567 568 def _should_add_type_key(t: type) -> bool: 569 is_serializable_class = _safe_issubclass(t, Serializable) or attr.has(t) or _safe_issubclass(t, BaseModel) 570 return is_serializable_class and not _safe_issubclass(t, _AvoidTypeKeyLogic) 571 572 573 def _deserialize_serialized_object(data: Any, type_of_data: type, converter: Converter) -> Any: 574 if isinstance(data, list): 575 # Data is a list of objects. 576 return converter.structure(data, list[_ShouldDeserialize]) 577 elif not isinstance(data, Mapping): 578 # Data is a primitive, like an integer or a string. 579 return converter.structure(data, type(data)) 580 else: 581 # Data is a dictionary with a type key, representing an attrs object, a Pydantic model, or a Mapping 582 return _deserialize_using_type_marker(data, type_of_data, converter) 583 584 585 def _should_deserialize_with_type_key_logic(t: type) -> bool: 586 is_type_that_should_be_deserialized = ( 587 attr.has(t) 588 or _safe_issubclass(t, Serializable) 589 or _safe_issubclass(t, _ShouldDeserialize) 590 or t is Hashable 591 or _is_mapping_type(t) 592 or _safe_issubclass(t, BaseModel) 593 ) 594 should_avoid_type_key_logic = _safe_issubclass(t, _AvoidTypeKeyLogic) or _safe_issubclass( 595 get_origin(t) or NoneType, _AvoidTypeKeyLogic 596 ) 597 return is_type_that_should_be_deserialized and not should_avoid_type_key_logic 598 599 600 def deserialized_object_violates_target_type(obj: Any, target_type: type) -> bool: 601 if target_type is _ShouldDeserialize or target_type is Serializable: 602 return False 603 if type(target_type) is TypeVar: 604 # We're not really able to check if the object is an instance of a type that's behind a TypeVar. 605 return False 606 return not isinstance(obj, get_origin(target_type) or target_type) 607 608 609 # Note that expected_type_based_on_annotations may be much more vague than the actual type of the object. 610 # For example: it may be Serializable, when the object is supposed to be 611 # deserialized as a HammerResult. We get the real type from the "__type" key. 612 def _deserialize_using_type_marker( 613 obj: Mapping[Any, Any], 614 expected_type_based_on_annotations: type[T], 615 converter: Converter, 616 ) -> T: 617 if TYPE_KEY in obj: 618 type_of_obj = _type_from_string(obj[TYPE_KEY]) 619 else: 620 type_of_obj = expected_type_based_on_annotations 621 622 if _is_special_mapping_type(type_of_obj): 623 pass 624 elif _is_frozen_mapping_type(type_of_obj): 625 obj.pop(TYPE_KEY, None) # type: ignore 626 type_of_obj = FrozenMapping[_ShouldDeserialize, _ShouldDeserialize] 627 elif _is_mapping_type(type_of_obj): 628 obj.pop(TYPE_KEY, None) # type: ignore 629 type_of_obj = dict[_ShouldDeserialize, _ShouldDeserialize] 630 elif _safe_issubclass(type_of_obj, BaseModel): 631 assert isinstance(obj, dict) 632 obj.pop(TYPE_KEY, None) 633 return cast(T, type_of_obj.model_validate(obj)) 634 elif not attr.has(type_of_obj): 635 # This happens when there is a primitive object which is annotated as Serializable. 636 return converter.structure(obj, type_of_obj) # type: ignore 637 638 # By mixing in the "avoid type key logic" class, force cattrs to do its normal behavior. 639 ret: T = converter.structure(obj, flag_to_ignore_type_key_hooks(type_of_obj)) 640 641 if inspect.isclass(type_of_obj): 642 # Upcast the result so that it has the correct type again, without the mixin. 643 object.__setattr__(ret, "__class__", type_of_obj) 644 645 if deserialized_object_violates_target_type(ret, expected_type_based_on_annotations): 646 raise TypeError( 647 f"Tried to deserialize into type {expected_type_based_on_annotations}, but got object of type {type(ret)}" 648 ) 649 650 return ret 651 652 653 def _resolve_default(default: Any) -> Any: 654 if isinstance(default, attr.Factory): # type: ignore 655 return default.factory() 656 return default 657 658 659 def _serialize_with_defaults(cls: type, converter: Converter) -> Callable[[Any], Any]: 660 # Handle a pydantic model 661 if _safe_issubclass(cls, BaseModel): 662 return lambda x: {k: converter.unstructure(v) for k, v in get_pydantic_model_attributes(x).items()} 663 664 members_with_defaults = get_serialize_with_default_member_names_of_type(cls) 665 overriden_kwargs = { 666 name: override(unstruct_hook=(lambda _, value=_resolve_default(default): value)) # type: ignore 667 for name, default in members_with_defaults.items() 668 } 669 return make_dict_unstructure_fn(cls, converter, **overriden_kwargs) # type: ignore 670 671 672 def _should_serialize_as_serialized_exception(t: type) -> bool: 673 return ( 674 _safe_issubclass(get_origin(t) or t, BaseException) and not attr.has(t) and not _safe_issubclass(t, BaseModel) 675 ) 676 677 678 ########################################################################################## 679 # CONVERTER FACTORY 680 ########################################################################################## 681 682 683 class _ConverterFactory: 684 """Factory for creating converters with different configurations. 685 686 e.g. for serializing to javascript, or python, or to include do-not-serialize fields. 687 """ 688 689 def build_base_converter(self) -> Converter: 690 # Builds of new base converter object, which registers all the hooks that are common to all converters. 691 # The idea being that all new converters start from this base and then override hooks they need to change 692 # NOTE: we need to generate a new converter object for each independent concrete converter (as opposed to 693 # using converter.copy()) since we use partial functions/closures and this way we ensure the function is 694 # being called with the correct converter object. 695 converter = Converter() 696 697 converter.register_structure_hook_func(_is_mapping_type, partial(_deserialize_mapping, converter=converter)) 698 # serialization of mapping types depends on the specific converter so is done in the get_converter factory method 699 700 converter.register_unstructure_hook(frozenset, partial(_serialize_frozen_set, converter=converter)) 701 converter.register_structure_hook(frozenset, partial(_deserialize_frozen_set, converter=converter)) 702 703 converter.register_unstructure_hook(UUID, _serialize_uuid) 704 converter.register_structure_hook(UUID, _deserialize_uuid) 705 706 converter.register_unstructure_hook(URL, _serialize_url) 707 converter.register_structure_hook(URL, _deserialize_url) 708 709 converter.register_unstructure_hook(Decimal, _serialize_decimal) 710 converter.register_structure_hook(Decimal, _deserialize_decimal) 711 712 converter.register_unstructure_hook(FixedTraceback, _serialize_traceback) 713 converter.register_structure_hook(FixedTraceback, _deserialize_traceback) 714 715 converter.register_unstructure_hook(Path, _serialize_path) 716 converter.register_structure_hook(Path, _deserialize_path) 717 718 converter.register_unstructure_hook(anyio.Path, _serialize_anyio_path) 719 converter.register_structure_hook(anyio.Path, _deserialize_anyio_path) 720 721 converter.register_unstructure_hook(datetime.datetime, _serialize_datetime) 722 converter.register_structure_hook(datetime.datetime, _deserialize_datetime) 723 724 converter.register_unstructure_hook(bytes, _serialize_bytes) 725 converter.register_structure_hook(bytes, _deserialize_bytes) 726 727 converter.register_unstructure_hook(PosixPath, _serialize_path) 728 converter.register_structure_hook(PosixPath, _deserialize_path) 729 730 converter.register_unstructure_hook_func(_is_forward_ref, partial(_serialize_forward_ref, converter=converter)) 731 converter.register_structure_hook_func(_is_forward_ref, partial(_deserialize_forward_ref, converter=converter)) 732 733 converter.register_structure_hook_func(_is_union_type, partial(_deserialize_union_type, converter=converter)) 734 735 converter.register_structure_hook(NoneType, lambda data, _: None) 736 737 converter.register_unstructure_hook(Enum, partial(_serialize_enum, converter=converter)) 738 converter.register_structure_hook(Enum, _deserialize_enum) 739 740 converter.register_unstructure_hook_func( 741 _should_serialize_as_serialized_exception, 742 lambda e: serialize_to_dict( 743 SerializedException.build(e), 744 use_defaults_for_unserializable_fields=True, 745 ), 746 ) 747 748 converter.register_structure_hook_func( 749 _should_deserialize_with_type_key_logic, 750 partial(_deserialize_serialized_object, converter=converter), 751 ) 752 753 converter.register_structure_hook_func( 754 lambda t: isinstance(t, TypeVar), 755 partial(_deserialize_serialized_object, converter=converter), 756 ) 757 758 return converter 759 760 def get_converter_with_defaults(self, converter: Converter) -> Converter: 761 converter.register_unstructure_hook(asyncio.Lock, lambda _: None) 762 converter.register_structure_hook(asyncio.Lock, lambda data, _: asyncio.Lock()) 763 764 converter.register_unstructure_hook(asyncio.Task, lambda _: None) 765 converter.register_structure_hook(asyncio.Task, lambda data, _: None) 766 767 converter.register_unstructure_hook(asyncio.Queue, lambda _: None) 768 converter.register_structure_hook(asyncio.Queue, lambda data, _: None) 769 770 converter.register_unstructure_hook(asyncio.Event, lambda _: None) 771 converter.register_structure_hook(asyncio.Event, lambda data, _: None) 772 773 converter.register_unstructure_hook(asyncio.Semaphore, lambda _: None) 774 converter.register_structure_hook(asyncio.Semaphore, lambda data, _: None) 775 776 converter.register_unstructure_hook(abc.ABCMeta, lambda _: None) 777 converter.register_structure_hook(abc.ABCMeta, lambda data, _: None) 778 converter.register_unstructure_hook_factory( 779 _should_serialize_without_type_key, 780 partial(_serialize_with_defaults, converter=converter), 781 ) 782 783 return converter 784 785 @functools.cache 786 def get_converter( 787 self, 788 for_javascript: bool = False, 789 exclude_dont_serialize_fields: bool = False, 790 use_defaults_for_unserializable_fields: bool = False, 791 ) -> Converter: 792 """Returns a converter with the given configuration. 793 794 The result of this method is cached, so subsequent calls with the same arguments will return the same converter. 795 """ 796 assert not ( 797 exclude_dont_serialize_fields and use_defaults_for_unserializable_fields 798 ), f"Expected exactly one flag to be set, got {exclude_dont_serialize_fields=}, {use_defaults_for_unserializable_fields=}" 799 800 converter = self.build_base_converter() 801 if for_javascript: 802 converter.register_unstructure_hook_func( 803 _is_mapping_type, 804 partial(_serialize_mapping_to_json_dict, converter=converter), 805 ) 806 else: 807 converter.register_unstructure_hook_func(_is_mapping_type, partial(_serialize_mapping, converter=converter)) 808 converter.register_unstructure_hook(tuple, partial(_serialize_tuple, converter=converter)) 809 converter.register_structure_hook(tuple, partial(_deserialize_tuple, converter=converter)) 810 811 if exclude_dont_serialize_fields: 812 converter.register_unstructure_hook_factory( 813 _should_serialize_without_type_key, 814 partial( 815 _serialize_attr_class_without_dont_serialize_fields, 816 converter=converter, 817 is_camel_case=for_javascript, 818 ), 819 ) 820 else: 821 converter.register_unstructure_hook_factory( 822 _should_serialize_without_type_key, 823 partial(_serialize_attr_class_factory, converter=converter), 824 ) 825 if use_defaults_for_unserializable_fields: 826 converter = self.get_converter_with_defaults(converter) 827 828 converter.register_unstructure_hook_func( 829 _should_add_type_key, 830 partial( 831 _serialize_with_type_key, 832 converter=converter, 833 for_javascript=for_javascript, 834 ), 835 ) 836 837 return converter 838 839 840 CONVERTER_FACTORY = _ConverterFactory() 841 842 843 ########################################################################################## 844 # ENTRY POINTS 845 ########################################################################################## 846 847 848 def _serialize_to_json_dumpable_object( 849 obj: Any, 850 is_reversible: bool = True, 851 for_javascript: bool = False, 852 exclude_dont_serialize_fields: bool = False, 853 use_defaults_for_unserializable_fields: bool = False, 854 ) -> Any: 855 if exclude_dont_serialize_fields: 856 # Check and raise error to make it clear to the caller that the object cannot be deserialized. 857 # This is a sanity check, to make it easier to debug when using do-not-serialize fields. 858 # NOTE: this will only catch cases where non-serializable fields are in obj, but not cases where 859 # the non-serializable fields are in nested objects, checking for the nested case is a little complicated 860 # so we don't do it basically. 861 assert ( 862 not is_reversible 863 ), "Cannot deserialize object when excluding do-not-serialize fields (i.e. when `exclude_dont_serialize_fields=True`). If you want to serialize an object and exclude do-not-serialize fields, make sure to set `is_reversible=False`." 864 865 if use_defaults_for_unserializable_fields: 866 # The point of the use_defaults_for_unserializable_fields flag is to make it possible to serialize objects 867 # and then recreate them later even if certain fields are not fully saved. We never want to use this flag 868 # with `is_reversible=False` since we won't know the type to be able to recreate the object. 869 assert is_reversible, "Cannot restructure inputs if is_reversible=False" 870 871 # TODO: this is a hack to make it possible to serialize ExecutionContexts for class methods. 872 # This lets us serialize ExecutionContexts for calls to class methods without serializing the class itself. 873 # The long-term solution is to write a custom hook that can serialize type objects. 874 if type(obj) is dict and "__class__" in obj: 875 del obj["__class__"] 876 877 converter = CONVERTER_FACTORY.get_converter( 878 for_javascript=for_javascript, 879 exclude_dont_serialize_fields=exclude_dont_serialize_fields, 880 use_defaults_for_unserializable_fields=use_defaults_for_unserializable_fields, 881 ) 882 883 dict_result = converter.unstructure(obj) 884 if for_javascript: 885 dict_result = _camelize_keys_which_represent_python_names(dict_result) 886 887 if not is_reversible: 888 return _to_json_dumpable_object_without_type_keys(dict_result) 889 890 return dict_result 891 892 893 def serialize_to_dict( 894 obj: Any, 895 is_reversible: bool = True, 896 for_javascript: bool = False, 897 exclude_dont_serialize_fields: bool = False, 898 use_defaults_for_unserializable_fields: bool = False, 899 ) -> dict[str, Any]: 900 """Serialize to a python dict.""" 901 return cast( 902 dict[str, Any], 903 _serialize_to_json_dumpable_object( 904 obj, 905 is_reversible=is_reversible, 906 for_javascript=for_javascript, 907 exclude_dont_serialize_fields=exclude_dont_serialize_fields, 908 use_defaults_for_unserializable_fields=use_defaults_for_unserializable_fields, 909 ), 910 ) 911 912 913 def serialize_to_json( 914 obj: Any, 915 indent: int | None = None, 916 sort_keys: bool = False, 917 is_reversible: bool = True, 918 for_javascript: bool = False, 919 exclude_dont_serialize_fields: bool = False, 920 use_defaults_for_unserializable_fields: bool = False, 921 ) -> str: 922 """Serialize an object to a JSON string. 923 924 This is the main serialization entrypoint. 925 926 `is_reversible` controls whether we enforce that the result can be deserialized. In some cases we don't care about 927 reversibility, e.g. when serializing data for a frontend we often don't care whether we can deserialize. 928 929 `for_javascript` controls whether we use camelCase for keys that originally were Python identifiers. 930 931 `exclude_dont_serialize_fields` controls whether we include do-not-serialize fields in the serialization. 932 If this is `False` then any attr class fields marked with as don't serialize, e.g. with `attr.ib(metadata=DONT_SERIALIZE)`, 933 will still be included in the serialization. If this is `True` then they will be excluded, however this also means that 934 the result will not be reversible (and thus the caller will have to set `is_reversible=False`). 935 936 `use_defaults_for_unserializable_fields` controls whether we fill fields that cannot be serialized with their default values. 937 IMPORTANT: If you use this flag, data may be discarded during deserialization. 938 The goal is to be able to deserialize fields to the original type without caring about the data contained. 939 Default value choices (guided by crafty serialization requirements): 940 - Fields that are marked with attr.ib(metadata=SERIALIZE_WITH_DEFAULT) have the following default values: 941 - Fields that are marked with `attr.ib(default=...)` or `attr.ib(factory=...)` use their default values. 942 - Fields that do not have a default value are filled with None. 943 - Asyncio objects are filled with None. 944 - Exceptions are replaced with a string representation 945 """ 946 try: 947 unstructured = _serialize_to_json_dumpable_object( 948 obj, 949 is_reversible=is_reversible, 950 for_javascript=for_javascript, 951 exclude_dont_serialize_fields=exclude_dont_serialize_fields, 952 use_defaults_for_unserializable_fields=use_defaults_for_unserializable_fields, 953 ) 954 return json.dumps(unstructured, indent=indent, sort_keys=sort_keys) 955 except Exception as e: 956 raise SerializationError(str(e)) from e 957 958 959 def deserialize_from_json( 960 data: str, 961 for_javascript: bool = False, 962 exclude_dont_serialize_fields: bool = False, 963 use_defaults_for_unserializable_fields: bool = False, 964 ) -> Any: 965 try: 966 converter = CONVERTER_FACTORY.get_converter( 967 for_javascript=for_javascript, 968 exclude_dont_serialize_fields=exclude_dont_serialize_fields, 969 use_defaults_for_unserializable_fields=use_defaults_for_unserializable_fields, 970 ) 971 return _deserialize_serialized_object(json.loads(data), _ShouldDeserialize, converter=converter) 972 except Exception as e: 973 raise SerializationError(str(e)) from e 974 975 976 def deserialize_from_dict( 977 data: dict[str, Any], 978 as_type: type = _ShouldDeserialize, 979 for_javascript: bool = False, 980 exclude_dont_serialize_fields: bool = False, 981 use_defaults_for_unserializable_fields: bool = False, 982 ) -> Any: 983 try: 984 converter = CONVERTER_FACTORY.get_converter( 985 for_javascript=for_javascript, 986 exclude_dont_serialize_fields=exclude_dont_serialize_fields, 987 use_defaults_for_unserializable_fields=use_defaults_for_unserializable_fields, 988 ) 989 return _deserialize_using_type_marker(data, as_type, converter=converter) 990 except Exception as e: 991 raise SerializationError(str(e)) from e