Module chompjs

Expand source code
# -*- coding: utf-8 -*-

import json

from _chompjs import parse, parse_objects


def _preprocess(string, unicode_escape=False):
    if unicode_escape:
        string = string.encode().decode('unicode_escape')
    return string


def parse_js_object(string, unicode_escape=False, json_params=None):
    """
    Extracts first JSON object encountered in the input string

    Parameters
    ----------
    string: str
        Input string

    >>> parse_js_object("{a: 100}")
    {'a': 100}

    unicode_escape: bool, optional
        Attempt to fix input string if it contains escaped special characters

    >>> parse_js_object('{\\\\"a\\\\": 100}')
    {'\\\\"a\\\\"': 100}
    >>> parse_js_object('{\\\\"a\\\\": 100}', unicode_escape=True)
    {'a': 100}

    json_params: dict, optional
        Allow passing down standard json.loads options

    >>> parse_js_object("{'a': 10.1}")
    {'a': 10.1}
    >>> import decimal
    >>> parse_js_object("{'a': 10.1}", json_params={'parse_float': decimal.Decimal})
    {'a': Decimal('10.1')}

    Returns
    -------
    list | dict
        Extracted JSON object

    Raises
    ------
    ValueError
        If failed to parse input properly

    ```python
    >>> parse_js_object(None)
    Traceback (most recent call last):
      ...
    ValueError: Invalid input
    >>> parse_js_object("No JSON objects in sight...")
    Traceback (most recent call last):
      ...
    json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

    ```

    """
    if not string:
        raise ValueError('Invalid input')

    string = _preprocess(string, unicode_escape)
    if not json_params:
        json_params = {}

    parsed_data = parse(string)
    return json.loads(parsed_data, **json_params)


def parse_js_objects(string, unicode_escape=False, omitempty=False, json_params=None):
    """
    Returns a generator extracting all JSON objects encountered in the input string.
    Can be used to read JSON Lines

    Parameters
    ----------
    string: str
        Input string

    >>> it = parse_js_objects("{a: 100} {b: 100}")
    >>> next(it)
    {'a': 100}
    >>> next(it)
    {'b': 100}

    unicode_escape: bool, optional
        Attempt to fix input string if it contains escaped special characters

    >>> next(parse_js_objects('{\\\\"a\\\\": 100}'))
    {'\\\\"a\\\\"': 100}
    >>> next(parse_js_objects('{\\\\"a\\\\": 100}', unicode_escape=True))
    {'a': 100}

    omitempty: bool, optional
        Skip empty dictionaries and lists

    >>> list(parse_js_objects("{a: 12} {} {b: 13}"))
    [{'a': 12}, {}, {'b': 13}]
    >>> list(parse_js_objects("{a: 12} {} {b: 13}", omitempty=True))
    [{'a': 12}, {'b': 13}]

    json_params: dict, optional
        Allow passing down standard json.loads flags

    >>> next(parse_js_objects("{'a': 10.1}"))
    {'a': 10.1}
    >>> import decimal
    >>> next(parse_js_objects("{'a': 10.1}", json_params={'parse_float': decimal.Decimal}))
    {'a': Decimal('10.1')}

    Returns
    -------
    generator
        Iterating over it yields all encountered JSON objects
    """

    if not string:
        return

    string = _preprocess(string, unicode_escape)
    if not json_params:
        json_params = {}
    for raw_data in parse_objects(string):
        try:
            data = json.loads(raw_data, **json_params)
        except ValueError:
            continue
        
        if not data and omitempty:
            continue

        yield data

Functions

def parse_js_object(string, unicode_escape=False, json_params=None)

Extracts first JSON object encountered in the input string

Parameters

string : str
Input string
>>> parse_js_object("{a: 100}")
{'a': 100}
unicode_escape : bool, optional
Attempt to fix input string if it contains escaped special characters
>>> parse_js_object('{\\"a\\": 100}')
{'\\"a\\"': 100}
>>> parse_js_object('{\\"a\\": 100}', unicode_escape=True)
{'a': 100}
json_params : dict, optional
Allow passing down standard json.loads options
>>> parse_js_object("{'a': 10.1}")
{'a': 10.1}
>>> import decimal
>>> parse_js_object("{'a': 10.1}", json_params={'parse_float': decimal.Decimal})
{'a': Decimal('10.1')}

Returns

list | dict
Extracted JSON object

Raises

ValueError
If failed to parse input properly
>>> parse_js_object(None)
Traceback (most recent call last):
  ...
ValueError: Invalid input
>>> parse_js_object("No JSON objects in sight...")
Traceback (most recent call last):
  ...
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

Expand source code
def parse_js_object(string, unicode_escape=False, json_params=None):
    """
    Extracts first JSON object encountered in the input string

    Parameters
    ----------
    string: str
        Input string

    >>> parse_js_object("{a: 100}")
    {'a': 100}

    unicode_escape: bool, optional
        Attempt to fix input string if it contains escaped special characters

    >>> parse_js_object('{\\\\"a\\\\": 100}')
    {'\\\\"a\\\\"': 100}
    >>> parse_js_object('{\\\\"a\\\\": 100}', unicode_escape=True)
    {'a': 100}

    json_params: dict, optional
        Allow passing down standard json.loads options

    >>> parse_js_object("{'a': 10.1}")
    {'a': 10.1}
    >>> import decimal
    >>> parse_js_object("{'a': 10.1}", json_params={'parse_float': decimal.Decimal})
    {'a': Decimal('10.1')}

    Returns
    -------
    list | dict
        Extracted JSON object

    Raises
    ------
    ValueError
        If failed to parse input properly

    ```python
    >>> parse_js_object(None)
    Traceback (most recent call last):
      ...
    ValueError: Invalid input
    >>> parse_js_object("No JSON objects in sight...")
    Traceback (most recent call last):
      ...
    json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

    ```

    """
    if not string:
        raise ValueError('Invalid input')

    string = _preprocess(string, unicode_escape)
    if not json_params:
        json_params = {}

    parsed_data = parse(string)
    return json.loads(parsed_data, **json_params)
def parse_js_objects(string, unicode_escape=False, omitempty=False, json_params=None)

Returns a generator extracting all JSON objects encountered in the input string. Can be used to read JSON Lines

Parameters

string : str
Input string
>>> it = parse_js_objects("{a: 100} {b: 100}")
>>> next(it)
{'a': 100}
>>> next(it)
{'b': 100}
unicode_escape : bool, optional
Attempt to fix input string if it contains escaped special characters
>>> next(parse_js_objects('{\\"a\\": 100}'))
{'\\"a\\"': 100}
>>> next(parse_js_objects('{\\"a\\": 100}', unicode_escape=True))
{'a': 100}
omitempty : bool, optional
Skip empty dictionaries and lists
>>> list(parse_js_objects("{a: 12} {} {b: 13}"))
[{'a': 12}, {}, {'b': 13}]
>>> list(parse_js_objects("{a: 12} {} {b: 13}", omitempty=True))
[{'a': 12}, {'b': 13}]
json_params : dict, optional
Allow passing down standard json.loads flags
>>> next(parse_js_objects("{'a': 10.1}"))
{'a': 10.1}
>>> import decimal
>>> next(parse_js_objects("{'a': 10.1}", json_params={'parse_float': decimal.Decimal}))
{'a': Decimal('10.1')}

Returns

generator
Iterating over it yields all encountered JSON objects
Expand source code
def parse_js_objects(string, unicode_escape=False, omitempty=False, json_params=None):
    """
    Returns a generator extracting all JSON objects encountered in the input string.
    Can be used to read JSON Lines

    Parameters
    ----------
    string: str
        Input string

    >>> it = parse_js_objects("{a: 100} {b: 100}")
    >>> next(it)
    {'a': 100}
    >>> next(it)
    {'b': 100}

    unicode_escape: bool, optional
        Attempt to fix input string if it contains escaped special characters

    >>> next(parse_js_objects('{\\\\"a\\\\": 100}'))
    {'\\\\"a\\\\"': 100}
    >>> next(parse_js_objects('{\\\\"a\\\\": 100}', unicode_escape=True))
    {'a': 100}

    omitempty: bool, optional
        Skip empty dictionaries and lists

    >>> list(parse_js_objects("{a: 12} {} {b: 13}"))
    [{'a': 12}, {}, {'b': 13}]
    >>> list(parse_js_objects("{a: 12} {} {b: 13}", omitempty=True))
    [{'a': 12}, {'b': 13}]

    json_params: dict, optional
        Allow passing down standard json.loads flags

    >>> next(parse_js_objects("{'a': 10.1}"))
    {'a': 10.1}
    >>> import decimal
    >>> next(parse_js_objects("{'a': 10.1}", json_params={'parse_float': decimal.Decimal}))
    {'a': Decimal('10.1')}

    Returns
    -------
    generator
        Iterating over it yields all encountered JSON objects
    """

    if not string:
        return

    string = _preprocess(string, unicode_escape)
    if not json_params:
        json_params = {}
    for raw_data in parse_objects(string):
        try:
            data = json.loads(raw_data, **json_params)
        except ValueError:
            continue
        
        if not data and omitempty:
            continue

        yield data