Module chompjs
Expand source code
# -*- coding: utf-8 -*-
import json
import warnings
from _chompjs import parse, parse_objects
def _preprocess(string, unicode_escape=False):
if unicode_escape:
string = string.encode().decode("unicode_escape")
return string
def _process_loader_arguments(loader_args, loader_kwargs, json_params):
if json_params:
msg = "json_params argument is deprecated, please use loader_kwargs instead"
warnings.warn(msg, DeprecationWarning)
loader_kwargs = json_params
if not loader_args:
loader_args = []
if not loader_kwargs:
loader_kwargs = {}
return (loader_args, loader_kwargs)
def parse_js_object(
string,
unicode_escape=False,
loader=json.loads,
loader_args=None,
loader_kwargs=None,
json_params=None,
):
"""
Extracts first JSON object encountered in the input string
Parameters
----------
string: str
Input string
>>> parse_js_object("{a: 100}")
{'a': 100}
unicode_escape: bool, optional
Attempt to fix input string if it contains escaped special characters
>>> parse_js_object('{\\\\"a\\\\": 100}')
{'\\\\"a\\\\"': 100}
>>> parse_js_object('{\\\\"a\\\\": 100}', unicode_escape=True)
{'a': 100}
loader: func, optional
Function used to load processed input data. By default `json.loads` is used
>>> import orjson
>>> import chompjs
>>>
>>> chompjs.parse_js_object("{'a': 12}", loader=orjson.loads)
{'a': 12}
loader_args: list, optional
Allow passing down positional arguments to loader function
loader_kwargs: dict, optional
Allow passing down keyword arguments to loader function
>>> parse_js_object("{'a': 10.1}")
{'a': 10.1}
>>> import decimal
>>> parse_js_object("{'a': 10.1}", loader_kwargs={'parse_float': decimal.Decimal})
{'a': Decimal('10.1')}
.. deprecated:: 1.3.0
json_params: dict, optional
Use `loader_kwargs` instead
Returns
-------
list | dict
Extracted JSON object
Raises
------
ValueError
If failed to parse input properly
```python
>>> parse_js_object(None)
Traceback (most recent call last):
...
ValueError: Invalid input
>>> parse_js_object("No JSON objects in sight...")
Traceback (most recent call last):
...
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
```
"""
if not string:
raise ValueError("Invalid input")
loader_args, loader_kwargs = _process_loader_arguments(
loader_args, loader_kwargs, json_params
)
if json_params:
msg = "json_params argument is deprecated, please use loader_kwargs instead"
warnings.warn(msg, DeprecationWarning)
string = _preprocess(string, unicode_escape)
parsed_data = parse(string)
return loader(parsed_data, *loader_args, **loader_kwargs)
def parse_js_objects(
string,
unicode_escape=False,
omitempty=False,
loader=json.loads,
loader_args=None,
loader_kwargs=None,
json_params=None,
):
"""
Returns a generator extracting all JSON objects encountered in the input string.
Can be used to read JSON Lines
Parameters
----------
string: str
Input string
>>> it = parse_js_objects("{a: 100} {b: 100}")
>>> next(it)
{'a': 100}
>>> next(it)
{'b': 100}
unicode_escape: bool, optional
Attempt to fix input string if it contains escaped special characters
>>> next(parse_js_objects('{\\\\"a\\\\": 100}'))
{'\\\\"a\\\\"': 100}
>>> next(parse_js_objects('{\\\\"a\\\\": 100}', unicode_escape=True))
{'a': 100}
omitempty: bool, optional
Skip empty dictionaries and lists
>>> list(parse_js_objects("{a: 12} {} {b: 13}"))
[{'a': 12}, {}, {'b': 13}]
>>> list(parse_js_objects("{a: 12} {} {b: 13}", omitempty=True))
[{'a': 12}, {'b': 13}]
loader: func, optional
Function used to load processed input data. By default `json.loads` is used
>>> import orjson
>>> import chompjs
>>>
>>> next(chompjs.parse_js_objects("{'a': 12}", loader=orjson.loads))
{'a': 12}
loader_args: list, optional
Allow passing down positional arguments to loader function
loader_kwargs: dict, optional
Allow passing down keyword arguments to loader function
>>> next(parse_js_objects("{'a': 10.1}"))
{'a': 10.1}
>>> import decimal
>>> next(parse_js_objects("{'a': 10.1}", loader_kwargs={'parse_float': decimal.Decimal}))
{'a': Decimal('10.1')}
.. deprecated:: 1.3.0
json_params: dict, optional
Use `loader_kwargs` instead
Returns
-------
generator
Iterating over it yields all encountered JSON objects
"""
if not string:
return
loader_args, loader_kwargs = _process_loader_arguments(
loader_args, loader_kwargs, json_params
)
string = _preprocess(string, unicode_escape)
for raw_data in parse_objects(string):
try:
data = loader(raw_data, *loader_args, **loader_kwargs)
except ValueError:
continue
if not data and omitempty:
continue
yield data
Functions
def parse_js_object(string, unicode_escape=False, loader=<function loads>, loader_args=None, loader_kwargs=None, json_params=None)
-
Extracts first JSON object encountered in the input string
Parameters
string
:str
- Input string
>>> parse_js_object("{a: 100}") {'a': 100}
unicode_escape
:bool
, optional- Attempt to fix input string if it contains escaped special characters
>>> parse_js_object('{\\"a\\": 100}') {'\\"a\\"': 100} >>> parse_js_object('{\\"a\\": 100}', unicode_escape=True) {'a': 100}
loader
:func
, optional- Function used to load processed input data. By default
json.loads
is used
>>> import orjson >>> import chompjs >>> >>> chompjs.parse_js_object("{'a': 12}", loader=orjson.loads) {'a': 12}
loader_args
:list
, optional- Allow passing down positional arguments to loader function
loader_kwargs
:dict
, optional- Allow passing down keyword arguments to loader function
>>> parse_js_object("{'a': 10.1}") {'a': 10.1} >>> import decimal >>> parse_js_object("{'a': 10.1}", loader_kwargs={'parse_float': decimal.Decimal}) {'a': Decimal('10.1')}
Deprecated since version: 1.3.0
json_params
:dict
, optional- Use
loader_kwargs
instead
Returns
list | dict
- Extracted JSON object
Raises
ValueError
- If failed to parse input properly
>>> parse_js_object(None) Traceback (most recent call last): ... ValueError: Invalid input >>> parse_js_object("No JSON objects in sight...") Traceback (most recent call last): ... json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Expand source code
def parse_js_object( string, unicode_escape=False, loader=json.loads, loader_args=None, loader_kwargs=None, json_params=None, ): """ Extracts first JSON object encountered in the input string Parameters ---------- string: str Input string >>> parse_js_object("{a: 100}") {'a': 100} unicode_escape: bool, optional Attempt to fix input string if it contains escaped special characters >>> parse_js_object('{\\\\"a\\\\": 100}') {'\\\\"a\\\\"': 100} >>> parse_js_object('{\\\\"a\\\\": 100}', unicode_escape=True) {'a': 100} loader: func, optional Function used to load processed input data. By default `json.loads` is used >>> import orjson >>> import chompjs >>> >>> chompjs.parse_js_object("{'a': 12}", loader=orjson.loads) {'a': 12} loader_args: list, optional Allow passing down positional arguments to loader function loader_kwargs: dict, optional Allow passing down keyword arguments to loader function >>> parse_js_object("{'a': 10.1}") {'a': 10.1} >>> import decimal >>> parse_js_object("{'a': 10.1}", loader_kwargs={'parse_float': decimal.Decimal}) {'a': Decimal('10.1')} .. deprecated:: 1.3.0 json_params: dict, optional Use `loader_kwargs` instead Returns ------- list | dict Extracted JSON object Raises ------ ValueError If failed to parse input properly ```python >>> parse_js_object(None) Traceback (most recent call last): ... ValueError: Invalid input >>> parse_js_object("No JSON objects in sight...") Traceback (most recent call last): ... json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) ``` """ if not string: raise ValueError("Invalid input") loader_args, loader_kwargs = _process_loader_arguments( loader_args, loader_kwargs, json_params ) if json_params: msg = "json_params argument is deprecated, please use loader_kwargs instead" warnings.warn(msg, DeprecationWarning) string = _preprocess(string, unicode_escape) parsed_data = parse(string) return loader(parsed_data, *loader_args, **loader_kwargs)
def parse_js_objects(string, unicode_escape=False, omitempty=False, loader=<function loads>, loader_args=None, loader_kwargs=None, json_params=None)
-
Returns a generator extracting all JSON objects encountered in the input string. Can be used to read JSON Lines
Parameters
string
:str
- Input string
>>> it = parse_js_objects("{a: 100} {b: 100}") >>> next(it) {'a': 100} >>> next(it) {'b': 100}
unicode_escape
:bool
, optional- Attempt to fix input string if it contains escaped special characters
>>> next(parse_js_objects('{\\"a\\": 100}')) {'\\"a\\"': 100} >>> next(parse_js_objects('{\\"a\\": 100}', unicode_escape=True)) {'a': 100}
omitempty
:bool
, optional- Skip empty dictionaries and lists
>>> list(parse_js_objects("{a: 12} {} {b: 13}")) [{'a': 12}, {}, {'b': 13}] >>> list(parse_js_objects("{a: 12} {} {b: 13}", omitempty=True)) [{'a': 12}, {'b': 13}]
loader
:func
, optional- Function used to load processed input data. By default
json.loads
is used
>>> import orjson >>> import chompjs >>> >>> next(chompjs.parse_js_objects("{'a': 12}", loader=orjson.loads)) {'a': 12}
loader_args
:list
, optional- Allow passing down positional arguments to loader function
loader_kwargs
:dict
, optional- Allow passing down keyword arguments to loader function
>>> next(parse_js_objects("{'a': 10.1}")) {'a': 10.1} >>> import decimal >>> next(parse_js_objects("{'a': 10.1}", loader_kwargs={'parse_float': decimal.Decimal})) {'a': Decimal('10.1')}
Deprecated since version: 1.3.0
json_params
:dict
, optional- Use
loader_kwargs
instead
Returns
generator
- Iterating over it yields all encountered JSON objects
Expand source code
def parse_js_objects( string, unicode_escape=False, omitempty=False, loader=json.loads, loader_args=None, loader_kwargs=None, json_params=None, ): """ Returns a generator extracting all JSON objects encountered in the input string. Can be used to read JSON Lines Parameters ---------- string: str Input string >>> it = parse_js_objects("{a: 100} {b: 100}") >>> next(it) {'a': 100} >>> next(it) {'b': 100} unicode_escape: bool, optional Attempt to fix input string if it contains escaped special characters >>> next(parse_js_objects('{\\\\"a\\\\": 100}')) {'\\\\"a\\\\"': 100} >>> next(parse_js_objects('{\\\\"a\\\\": 100}', unicode_escape=True)) {'a': 100} omitempty: bool, optional Skip empty dictionaries and lists >>> list(parse_js_objects("{a: 12} {} {b: 13}")) [{'a': 12}, {}, {'b': 13}] >>> list(parse_js_objects("{a: 12} {} {b: 13}", omitempty=True)) [{'a': 12}, {'b': 13}] loader: func, optional Function used to load processed input data. By default `json.loads` is used >>> import orjson >>> import chompjs >>> >>> next(chompjs.parse_js_objects("{'a': 12}", loader=orjson.loads)) {'a': 12} loader_args: list, optional Allow passing down positional arguments to loader function loader_kwargs: dict, optional Allow passing down keyword arguments to loader function >>> next(parse_js_objects("{'a': 10.1}")) {'a': 10.1} >>> import decimal >>> next(parse_js_objects("{'a': 10.1}", loader_kwargs={'parse_float': decimal.Decimal})) {'a': Decimal('10.1')} .. deprecated:: 1.3.0 json_params: dict, optional Use `loader_kwargs` instead Returns ------- generator Iterating over it yields all encountered JSON objects """ if not string: return loader_args, loader_kwargs = _process_loader_arguments( loader_args, loader_kwargs, json_params ) string = _preprocess(string, unicode_escape) for raw_data in parse_objects(string): try: data = loader(raw_data, *loader_args, **loader_kwargs) except ValueError: continue if not data and omitempty: continue yield data