diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..a0c19b2 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,5 @@ +[run] +source = javaobj/ + +[report] +include = javaobj/* diff --git a/.coveralls.yml b/.coveralls.yml deleted file mode 100644 index 9160059..0000000 --- a/.coveralls.yml +++ /dev/null @@ -1 +0,0 @@ -service_name: travis-ci diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..b1493ad --- /dev/null +++ b/.editorconfig @@ -0,0 +1,17 @@ +root=true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +indent_style = space +trim_trailing_whitespace = true + +[*.py] +indent_size = 4 + +[*.rst] +indent_size = 3 + +[*.{yml,yaml,toml}] +indent_size = 2 diff --git a/.github/workflows/build-20.04.yml b/.github/workflows/build-20.04.yml new file mode 100644 index 0000000..2d8a2bc --- /dev/null +++ b/.github/workflows/build-20.04.yml @@ -0,0 +1,49 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: CI Build - Python 3.5-3.7 + +on: + push: + branches: [ "master" ] + tags: '**' + pull_request: + branches: [ "master" ] + +jobs: + build: + timeout-minutes: 10 + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python-version: ["3.5", "3.6", "3.7"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + env: + PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest coverage + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test + run: | + coverage run -m pytest + - name: Coveralls + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + run: | + pip install coveralls + coveralls diff --git a/.github/workflows/build-24.04.yml b/.github/workflows/build-24.04.yml new file mode 100644 index 0000000..4c25cf3 --- /dev/null +++ b/.github/workflows/build-24.04.yml @@ -0,0 +1,47 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: CI Build - Python 3.8+ + +on: + push: + branches: [ "master" ] + tags: '**' + pull_request: + branches: [ "master" ] + +jobs: + build: + timeout-minutes: 10 + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14-dev"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest coverage + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test + run: | + coverage run -m pytest + - name: Coveralls + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + run: | + pip install coveralls + coveralls diff --git a/.gitignore b/.gitignore index 5ffbefd..9711698 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *.so # Packages +.eggs/ *.egg *.egg-info dist @@ -37,3 +38,11 @@ nosetests.xml .idea/ .vscode/ .*cache/ + +# Log files +*.log + +# Folders and scripts used to reproduce issues +/issue*/ +/repro*.py +/test*.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4d82b77..0000000 --- a/.travis.yml +++ /dev/null @@ -1,18 +0,0 @@ -language: python -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - -sudo: false - -install: - - pip install nose coverage coveralls - - pip install pytest>=2.7.3 --upgrade - -script: - - nosetests -v --with-coverage --cover-package=javaobj tests - -after_success: - - coveralls diff --git a/AUTHORS b/AUTHORS index aa70b71..bbc1a99 100644 --- a/AUTHORS +++ b/AUTHORS @@ -9,3 +9,7 @@ Many thanks to the contributors: * @voetsjoeba * Vadim Markovtsev (@vmarkovtsev) * Jason Spencer, Google LLC (@j8spencer) +* @guywithface +* Chris van Marle (@qistoph) +* Federico Alves (@UruDev) +* @sarimak diff --git a/README.md b/README.md new file mode 100644 index 0000000..4385a0b --- /dev/null +++ b/README.md @@ -0,0 +1,482 @@ +# javaobj-py3 + +[![Latest Version](https://img.shields.io/pypi/v/javaobj-py3.svg)](https://pypi.python.org/pypi/javaobj-py3/) +[![License](https://img.shields.io/pypi/l/javaobj-py3.svg)](https://pypi.python.org/pypi/javaobj-py3/) +[![CI Build](https://github.com/tcalmant/python-javaobj/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/tcalmant/python-javaobj/actions/workflows/build.yml) +[![Coveralls status](https://coveralls.io/repos/tcalmant/python-javaobj/badge.svg?branch=master)](https://coveralls.io/r/tcalmant/python-javaobj?branch=master) + +*python-javaobj* is a python library that provides functions for reading and +writing (writing is WIP currently) Java objects serialized or will be +deserialized by `ObjectOutputStream`. This form of object representation is a +standard data interchange format in Java world. + +The `javaobj` module exposes an API familiar to users of the standard library +`marshal`, `pickle` and `json` modules. + +## About this repository + +This project is a fork of *python-javaobj* by Volodymyr Buell, originally from +[Google Code](http://code.google.com/p/python-javaobj/) and now hosted on +[GitHub](https://github.com/vbuell/python-javaobj). + +This fork intends to work both on Python 2.7 and Python 3.4+. + +## Compatibility Warnings + +### New implementation of the parser + +| Implementations | Version | +|-----------------|----------| +| `v1`, `v2` | `0.4.0+` | + +Since version 0.4.0, two implementations of the parser are available: + +* `v1`: the *classic* implementation of `javaobj`, with a work in progress + implementation of a writer. +* `v2`: the *new* implementation, which is a port of the Java project + [`jdeserialize`](https://github.com/frohoff/jdeserialize/), + with support of the object transformer (with a new API) and of the `numpy` + arrays loading. + +You can use the `v1` parser to ensure that the behaviour of your scripts +doesn't change and to keep the ability to write down files. + +You can use the `v2` parser for new developments +*which won't require marshalling* and as a *fallback* if the `v1` +fails to parse a file. + +### Object transformers V1 + +| Implementations | Version | +|-----------------|----------| +| `v1` | `0.2.0+` | + +As of version 0.2.0, the notion of *object transformer* from the original +project as been replaced by an *object creator*. + +The *object creator* is called before the deserialization. +This allows to store the reference of the converted object before deserializing +it, and avoids a mismatch between the referenced object and the transformed one. + +### Object transformers V2 + +| Implementations | Version | +|-----------------|----------| +| `v2` | `0.4.0+` | + +The `v2` implementation provides a new API for the object transformers. +Please look at the *Usage (V2)* section in this file. + +### Bytes arrays + +| Implementations | Version | +|-----------------|----------| +| `v1` | `0.2.3+` | + +As of version 0.2.3, bytes arrays are loaded as a `bytes` object instead of +an array of integers. + +### Custom Transformer + +| Implementations | Version | +|-----------------|----------| +| `v2` | `0.4.2+` | + +A new transformer API has been proposed to handle objects written with a custom +Java writer. +You can find a sample usage in the *Custom Transformer* section in this file. + +## Features + +* Java object instance un-marshalling +* Java classes un-marshalling +* Primitive values un-marshalling +* Automatic conversion of Java Collections to python ones + (`HashMap` => `dict`, `ArrayList` => `list`, etc.) +* Basic marshalling of simple Java objects (`v1` implementation only) +* Automatically uncompresses GZipped files + +## Requirements + +* Python >= 2.7 or Python >= 3.4 +* `enum34` and `typing` when using Python <= 3.4 (installable with `pip`) +* Maven 2+ (for building test data of serialized objects. + You can skip it if you do not plan to run `tests.py`) + +## Usage (V1 implementation) + +Un-marshalling of Java serialised object: + +```python +import javaobj + +with open("obj5.ser", "rb") as fd: + jobj = fd.read() + +pobj = javaobj.loads(jobj) +print(pobj) +``` + +Or, you can use `JavaObjectUnmarshaller` object directly: + +```python +import javaobj + +with open("objCollections.ser", "rb") as fd: + marshaller = javaobj.JavaObjectUnmarshaller(fd) + pobj = marshaller.readObject() + + print(pobj.value, "should be", 17) + print(pobj.next, "should be", True) + + pobj = marshaller.readObject() +``` + +**Note:** The objects and methods provided by `javaobj` module are shortcuts +to the `javaobj.v1` package, for Compatibility purpose. +It is **recommended** to explicitly import methods and classes from the `v1` +(or `v2`) package when writing new code, in order to be sure that your code +won't need import updates in the future. + + +## Usage (V2 implementation) + +The following methods are provided by the `javaobj.v2` package: + +* `load(fd, *transformers, use_numpy_arrays=False)`: + Parses the content of the given file descriptor, opened in binary mode (`rb`). + The method accepts a list of custom object transformers. The default object + transformer is always added to the list. + + The `use_numpy_arrays` flag indicates that the arrays of primitive type + elements must be loaded using `numpy` (if available) instead of using the + standard parsing technic. + +* `loads(bytes, *transformers, use_numpy_arrays=False)`: + This the a shortcut to the `load()` method, providing it the binary data + using a `BytesIO` object. + +**Note:** The V2 parser doesn't have the marshalling capability. + +Sample usage: + +```python +import javaobj.v2 as javaobj + +with open("obj5.ser", "rb") as fd: + pobj = javaobj.load(fd) + +print(pobj.dump()) +``` + +### Object Transformer + +An object transformer can be called during the parsing of a Java object +instance or while loading an array. + +The Java object instance parsing works in two main steps: + +1. The transformer is called to create an instance of a bean that inherits + `JavaInstance`. +1. The latter bean is then called: + + * When the object is written with a custom block data + * After the fields and annotations have been parsed, to update the content + of the Python bean. + +Here is an example for a Java `HashMap` object. You can look at the code of +the `javaobj.v2.transformer` module to see the whole implementation. + +```python +class JavaMap(dict, javaobj.v2.beans.JavaInstance): + """ + Inherits from dict for Python usage, JavaInstance for parsing purpose + """ + def __init__(self): + # Don't forget to call both constructors + dict.__init__(self) + JavaInstance.__init__(self) + + def load_from_blockdata(self, parser, reader, indent=0): + """ + Reads content stored in a block data. + + This method is called only if the class description has both the + `SC_EXTERNALIZABLE` and `SC_BLOCK_DATA` flags set. + + The stream parsing will stop and fail if this method returns False. + + :param parser: The JavaStreamParser in use + :param reader: The underlying data stream reader + :param indent: Indentation to use in logs + :return: True on success, False on error + """ + # This kind of class is not supposed to have the SC_BLOCK_DATA flag set + return False + + def load_from_instance(self, indent=0): + # type: (int) -> bool + """ + Load content from the parsed instance object. + + This method is called after the block data (if any), the fields and + the annotations have been loaded. + + :param indent: Indentation to use while logging + :return: True on success (currently ignored) + """ + # Maps have their content in their annotations + for cd, annotations in self.annotations.items(): + # Annotations are associated to their definition class + if cd.name == "java.util.HashMap": + # We are in the annotation created by the handled class + # Group annotation elements 2 by 2 + # (storage is: key, value, key, value, ...) + args = [iter(annotations[1:])] * 2 + for key, value in zip(*args): + self[key] = value + + # Job done + return True + + # Couldn't load the data + return False + +class MapObjectTransformer(javaobj.v2.api.ObjectTransformer): + """ + Creates a JavaInstance object with custom loading methods for the + classes it can handle + """ + def create_instance(self, classdesc): + # type: (JavaClassDesc) -> Optional[JavaInstance] + """ + Transforms a parsed Java object into a Python object + + :param classdesc: The description of a Java class + :return: The Python form of the object, or the original JavaObject + """ + if classdesc.name == "java.util.HashMap": + # We can handle this class description + return JavaMap() + else: + # Return None if the class is not handled + return None +``` + +### Custom Object Transformer + +The custom transformer is called when the class is not handled by the default +object transformer. +A custom object transformer still inherits from the `ObjectTransformer` class, +but it also implements the `load_custom_writeObject` method. + +The sample given here is used in the unit tests. + +#### Java sample + +On the Java side, we create various classes and write them as we wish: + +```java +class CustomClass implements Serializable { + + private static final long serialVersionUID = 1; + + public void start(ObjectOutputStream out) throws Exception { + this.writeObject(out); + } + + private void writeObject(ObjectOutputStream out) throws IOException { + CustomWriter custom = new CustomWriter(42); + out.writeObject(custom); + out.flush(); + } +} + +class RandomChild extends Random { + + private static final long serialVersionUID = 1; + private int num = 1; + private double doub = 4.5; + + RandomChild(int seed) { + super(seed); + } +} + +class CustomWriter implements Serializable { + protected RandomChild custom_obj; + + CustomWriter(int seed) { + custom_obj = new RandomChild(seed); + } + + private static final long serialVersionUID = 1; + private static final int CURRENT_SERIAL_VERSION = 0; + + private void writeObject(ObjectOutputStream out) throws IOException { + out.writeInt(CURRENT_SERIAL_VERSION); + out.writeObject(custom_obj); + } +} +``` + +An here is a sample writing of that kind of object: + +```java +ObjectOutputStream oos = new ObjectOutputStream( + new FileOutputStream("custom_objects.ser")); +CustomClass writer = new CustomClass(); +writer.start(oos); +oos.flush(); +oos.close(); +``` + +#### Python sample + +On the Python side, the first step is to define the custom transformers. +They are children of the `javaobj.v2.transformers.ObjectTransformer` class. + +```python +class BaseTransformer(javaobj.v2.transformers.ObjectTransformer): + """ + Creates a JavaInstance object with custom loading methods for the + classes it can handle + """ + + def __init__(self, handled_classes=None): + self.instance = None + self.handled_classes = handled_classes or {} + + def create_instance(self, classdesc): + """ + Transforms a parsed Java object into a Python object + + :param classdesc: The description of a Java class + :return: The Python form of the object, or the original JavaObject + """ + if classdesc.name in self.handled_classes: + self.instance = self.handled_classes[classdesc.name]() + return self.instance + + return None + +class RandomChildTransformer(BaseTransformer): + def __init__(self): + super(RandomChildTransformer, self).__init__( + {"RandomChild": RandomChildInstance} + ) + +class CustomWriterTransformer(BaseTransformer): + def __init__(self): + super(CustomWriterTransformer, self).__init__( + {"CustomWriter": CustomWriterInstance} + ) + +class JavaRandomTransformer(BaseTransformer): + def __init__(self): + super(JavaRandomTransformer, self).__init__() + self.name = "java.util.Random" + self.field_names = ["haveNextNextGaussian", "nextNextGaussian", "seed"] + self.field_types = [ + javaobj.v2.beans.FieldType.BOOLEAN, + javaobj.v2.beans.FieldType.DOUBLE, + javaobj.v2.beans.FieldType.LONG, + ] + + def load_custom_writeObject(self, parser, reader, name): + if name != self.name: + return None + + fields = [] + values = [] + for f_name, f_type in zip(self.field_names, self.field_types): + values.append(parser._read_field_value(f_type)) + fields.append(javaobj.beans.JavaField(f_type, f_name)) + + class_desc = javaobj.beans.JavaClassDesc( + javaobj.beans.ClassDescType.NORMALCLASS + ) + class_desc.name = self.name + class_desc.desc_flags = javaobj.beans.ClassDataType.EXTERNAL_CONTENTS + class_desc.fields = fields + class_desc.field_data = values + return class_desc +``` + +Second step is defining the representation of the instances, where the real +object loading occurs. Those classes inherit from +`javaobj.v2.beans.JavaInstance`. + +```python +class CustomWriterInstance(javaobj.v2.beans.JavaInstance): + def __init__(self): + javaobj.v2.beans.JavaInstance.__init__(self) + + def load_from_instance(self): + """ + Updates the content of this instance + from its parsed fields and annotations + :return: True on success, False on error + """ + if self.classdesc and self.classdesc in self.annotations: + # Here, we known there is something written before the fields, + # even if it's not declared in the class description + fields = ["int_not_in_fields"] + self.classdesc.fields_names + raw_data = self.annotations[self.classdesc] + int_not_in_fields = struct.unpack( + ">i", BytesIO(raw_data[0].data).read(4) + )[0] + custom_obj = raw_data[1] + values = [int_not_in_fields, custom_obj] + self.field_data = dict(zip(fields, values)) + return True + + return False + + +class RandomChildInstance(javaobj.v2.beans.JavaInstance): + def load_from_instance(self): + """ + Updates the content of this instance + from its parsed fields and annotations + :return: True on success, False on error + """ + if self.classdesc and self.classdesc in self.field_data: + fields = self.classdesc.fields_names + values = [ + self.field_data[self.classdesc][self.classdesc.fields[i]] + for i in range(len(fields)) + ] + self.field_data = dict(zip(fields, values)) + if ( + self.classdesc.super_class + and self.classdesc.super_class in self.annotations + ): + super_class = self.annotations[self.classdesc.super_class][0] + self.annotations = dict( + zip(super_class.fields_names, super_class.field_data) + ) + return True + + return False +``` + +Finally we can use the transformers in the loading process. +Note that even if it is not explicitly given, the `DefaultObjectTransformer` +will be also be used, as it is added automatically by `javaobj` if it is +missing from the given list. + +```python +# Load the object using those transformers +transformers = [ + CustomWriterTransformer(), + RandomChildTransformer(), + JavaRandomTransformer() +] +pobj = javaobj.loads("custom_objects.ser", *transformers) + +# Here we show a field that isn't visible from the class description +# The field belongs to the class but it's not serialized by default because +# it's static. See: https://stackoverflow.com/a/16477421/12621168 +print(pobj.field_data["int_not_in_fields"]) +``` diff --git a/README.rst b/README.rst deleted file mode 100644 index 5e2a936..0000000 --- a/README.rst +++ /dev/null @@ -1,97 +0,0 @@ -javaobj-py3 -########### - -.. image:: https://img.shields.io/pypi/v/javaobj-py3.svg - :target: https://pypi.python.org/pypi/javaobj-py3/ - :alt: Latest Version - -.. image:: https://img.shields.io/pypi/l/javaobj-py3.svg - :target: https://pypi.python.org/pypi/javaobj-py3/ - :alt: License - -.. image:: https://travis-ci.org/tcalmant/python-javaobj.svg?branch=master - :target: https://travis-ci.org/tcalmant/python-javaobj - :alt: Travis-CI status - -.. image:: https://coveralls.io/repos/tcalmant/python-javaobj/badge.svg?branch=master - :target: https://coveralls.io/r/tcalmant/python-javaobj?branch=master - :alt: Coveralls status - -*python-javaobj* is a python library that provides functions for reading and -writing (writing is WIP currently) Java objects serialized or will be -deserialized by ``ObjectOutputStream``. This form of object representation is a -standard data interchange format in Java world. - -The ``javaobj`` module exposes an API familiar to users of the standard library -``marshal``, ``pickle`` and ``json`` modules. - -About this repository -===================== - -This project is a fork of *python-javaobj* by Volodymyr Buell, originally from -`Google Code `_ and now hosted on -`GitHub `_. - -This fork intends to work both on Python 2.7 and Python 3.4+. - -Compatibility Warning: object transformer ------------------------------------------ - -As of version 0.2.0, the notion of *object transformer* from the original -project as been replaced by an *object creator*. - -The *object creator* is called before the deserialization. -This allows to store the reference of the converted object before deserializing -it, and avoids a mismatch between the referenced object and the transformed one. - - -Compatibility Warning: bytes arrays ------------------------------------ - -As of version 0.2.3, bytes arrays are loaded as a ``bytes`` object instead of -an array of integers. - - -Features -======== - -* Java object instance unmarshaling -* Java classes unmarshaling -* Primitive values unmarshaling -* Automatic conversion of Java Collections to python ones - (``HashMap`` => ``dict``, ``ArrayList`` => ``list``, etc.) -* Basic marshalling of simple Java objects - -Requirements -============ - -* Python >= 2.7 or Python >= 3.4 -* Maven 2+ (for building test data of serialized objects. - You can skip it if you do not plan to run ``tests.py``) - -Usage -===== - -Unmarshalling of Java serialised object: - -.. code-block:: python - - import javaobj - - jobj = self.read_file("obj5.ser") - pobj = javaobj.loads(jobj) - print(pobj) - -Or, you can use Unmarshaller object directly: - -.. code-block:: python - - import javaobj - - marshaller = javaobj.JavaObjectUnmarshaller(open("objCollections.ser")) - pobj = marshaller.readObject() - - self.assertEqual(pobj.value, 17) - self.assertTrue(pobj.next) - - pobj = marshaller.readObject() diff --git a/javaobj.py b/javaobj.py deleted file mode 100644 index 2311ad3..0000000 --- a/javaobj.py +++ /dev/null @@ -1,1681 +0,0 @@ -#!/usr/bin/python -# -- Content-Encoding: UTF-8 -- -""" -Provides functions for reading and writing (writing is WIP currently) Java -objects serialized or will be deserialized by ObjectOutputStream. This form of -object representation is a standard data interchange format in Java world. - -javaobj module exposes an API familiar to users of the standard library -marshal, pickle and json modules. - -See: -http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html - -:authors: Volodymyr Buell, Thomas Calmant -:license: Apache License 2.0 -:version: 0.2.3 -:status: Alpha - -.. - - Copyright 2016 Thomas Calmant - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -# Standard library -import collections -import logging -import os -import struct -import sys - -try: - # Python 2 - from StringIO import StringIO as BytesIO -except ImportError: - # Python 3+ - from io import BytesIO - -# ------------------------------------------------------------------------------ - -# Module version -__version_info__ = (0, 2, 3) -__version__ = ".".join(str(x) for x in __version_info__) - -# Documentation strings format -__docformat__ = "restructuredtext en" - -# ------------------------------------------------------------------------------ - -# Setup the logger -_log = logging.getLogger(__name__) - - -def log_debug(message, ident=0): - """ - Logs a message at debug level - - :param message: Message to log - :param ident: Number of indentation spaces - """ - _log.debug(" " * (ident * 2) + str(message)) - - -def log_error(message, ident=0): - """ - Logs a message at error level - - :param message: Message to log - :param ident: Number of indentation spaces - """ - _log.error(" " * (ident * 2) + str(message)) - -# ------------------------------------------------------------------------------ - -if sys.version_info[0] >= 3: - # Python 3 interpreter : bytes & str - def to_bytes(data, encoding="UTF-8"): - """ - Converts the given string to an array of bytes. - Returns the first parameter if it is already an array of bytes. - - :param data: A unicode string - :param encoding: The encoding of data - :return: The corresponding array of bytes - """ - if type(data) is bytes: - # Nothing to do - return data - return data.encode(encoding) - - def to_str(data, encoding="UTF-8"): - """ - Converts the given parameter to a string. - Returns the first parameter if it is already an instance of ``str``. - - :param data: A string - :param encoding: The encoding of data - :return: The corresponding string - """ - if type(data) is str: - # Nothing to do - return data - return str(data, encoding) - - def read_to_str(data): - """ - Concats all bytes into a string - """ - return ''.join(chr(char) for char in data) - -else: - # Python 2 interpreter : str & unicode - def to_str(data, encoding="UTF-8"): - """ - Converts the given parameter to a string. - Returns the first parameter if it is already an instance of ``str``. - - :param data: A string - :param encoding: The encoding of data - :return: The corresponding string - """ - if type(data) is str: - # Nothing to do - return data - return data.encode(encoding) - - # Same operation - to_bytes = to_str - - def read_to_str(data): - """ - Nothing to do in Python 2 - """ - return data - -# ------------------------------------------------------------------------------ - - -def load(file_object, *transformers, **kwargs): - """ - Deserializes Java primitive data and objects serialized using - ObjectOutputStream from a file-like object. - - :param file_object: A file-like object - :param transformers: Custom transformers to use - :param ignore_remaining_data: If True, don't log an error when unused - trailing bytes are remaining - :return: The deserialized object - """ - # Read keyword argument - ignore_remaining_data = kwargs.get('ignore_remaining_data', False) - - marshaller = JavaObjectUnmarshaller( - file_object, kwargs.get('use_numpy_arrays', False)) - - # Add custom transformers first - for transformer in transformers: - marshaller.add_transformer(transformer) - marshaller.add_transformer(DefaultObjectTransformer()) - - # Read the file object - return marshaller.readObject(ignore_remaining_data=ignore_remaining_data) - - -def loads(string, *transformers, **kwargs): - """ - Deserializes Java objects and primitive data serialized using - ObjectOutputStream from a string. - - :param string: A Java data string - :param transformers: Custom transformers to use - :param ignore_remaining_data: If True, don't log an error when unused - trailing bytes are remaining - :return: The deserialized object - """ - # Read keyword argument - ignore_remaining_data = kwargs.get('ignore_remaining_data', False) - - # Reuse the load method (avoid code duplication) - return load(BytesIO(string), *transformers, - ignore_remaining_data=ignore_remaining_data) - - -def dumps(obj, *transformers): - """ - Serializes Java primitive data and objects unmarshaled by load(s) before - into string. - - :param obj: A Python primitive object, or one loaded using load(s) - :param transformers: Custom transformers to use - :return: The serialized data as a string - """ - marshaller = JavaObjectMarshaller() - # Add custom transformers - for transformer in transformers: - marshaller.add_transformer(transformer) - - return marshaller.dump(obj) - -# ------------------------------------------------------------------------------ - - -class JavaClass(object): - """ - Represents a class in the Java world - """ - def __init__(self): - """ - Sets up members - """ - self.name = None - self.serialVersionUID = None - self.flags = None - self.fields_names = [] - self.fields_types = [] - self.superclass = None - - def __str__(self): - """ - String representation of the Java class - """ - return self.__repr__() - - def __repr__(self): - """ - String representation of the Java class - """ - return "[{0:s}:0x{1:X}]".format(self.name, self.serialVersionUID) - - def __eq__(self, other): - """ - Equality test between two Java classes - - :param other: Other JavaClass to test - :return: True if both classes share the same fields and name - """ - if not isinstance(other, type(self)): - return False - - return (self.name == other.name and - self.serialVersionUID == other.serialVersionUID and - self.flags == other.flags and - self.fields_names == other.fields_names and - self.fields_types == other.fields_types and - self.superclass == other.superclass) - - -class JavaObject(object): - """ - Represents a deserialized non-primitive Java object - """ - def __init__(self): - """ - Sets up members - """ - self.classdesc = None - self.annotations = [] - - def get_class(self): - """ - Returns the JavaClass that defines the type of this object - """ - return self.classdesc - - def __str__(self): - """ - String representation - """ - return self.__repr__() - - def __repr__(self): - """ - String representation - """ - name = "UNKNOWN" - if self.classdesc: - name = self.classdesc.name - return "".format(name) - - def __eq__(self, other): - """ - Equality test between two Java classes - - :param other: Other JavaClass to test - :return: True if both classes share the same fields and name - """ - if not isinstance(other, type(self)): - return False - - res = (self.classdesc == other.classdesc and - self.annotations == other.annotations) - if not res: - return False - - for name in self.classdesc.fields_names: - if not getattr(self, name) == getattr(other, name): - return False - return True - - -class JavaString(str): - """ - Represents a Java String - """ - def __hash__(self): - return str.__hash__(self) - - def __eq__(self, other): - if not isinstance(other, str): - return False - return str.__eq__(self, other) - - -class JavaEnum(JavaObject): - """ - Represents a Java enumeration - """ - def __init__(self, constant=None): - super(JavaEnum, self).__init__() - self.constant = constant - - -class JavaArray(list, JavaObject): - """ - Represents a Java Array - """ - def __init__(self, classdesc=None): - list.__init__(self) - JavaObject.__init__(self) - self.classdesc = classdesc - - -class JavaByteArray(JavaObject): - """ - Represents the special case of Java Array which contains bytes - """ - def __init__(self, data, classdesc=None): - JavaObject.__init__(self) - self._data = struct.unpack("b" * len(data), data) - self.classdesc = classdesc - - def __str__(self): - return "JavaByteArray({0})".format(self._data) - - def __getitem__(self, item): - return self._data[item] - - def __iter__(self): - return iter(self._data) - - def __len__(self): - return len(self._data) - -# ------------------------------------------------------------------------------ - - -class JavaObjectConstants(object): - """ - Defines the constants of the Java serialization format - """ - STREAM_MAGIC = 0xaced - STREAM_VERSION = 0x05 - - TC_NULL = 0x70 - TC_REFERENCE = 0x71 - TC_CLASSDESC = 0x72 - TC_OBJECT = 0x73 - TC_STRING = 0x74 - TC_ARRAY = 0x75 - TC_CLASS = 0x76 - TC_BLOCKDATA = 0x77 - TC_ENDBLOCKDATA = 0x78 - TC_RESET = 0x79 - TC_BLOCKDATALONG = 0x7A - TC_EXCEPTION = 0x7B - TC_LONGSTRING = 0x7C - TC_PROXYCLASSDESC = 0x7D - TC_ENUM = 0x7E - # Ignore TC_MAX: we don't use it and it messes with TC_ENUM - # TC_MAX = 0x7E - - # classDescFlags - SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE - SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE - SC_SERIALIZABLE = 0x02 - SC_EXTERNALIZABLE = 0x04 - SC_ENUM = 0x10 - - # type definition chars (typecode) - TYPE_BYTE = 'B' # 0x42 - TYPE_CHAR = 'C' # 0x43 - TYPE_DOUBLE = 'D' # 0x44 - TYPE_FLOAT = 'F' # 0x46 - TYPE_INTEGER = 'I' # 0x49 - TYPE_LONG = 'J' # 0x4A - TYPE_SHORT = 'S' # 0x53 - TYPE_BOOLEAN = 'Z' # 0x5A - TYPE_OBJECT = 'L' # 0x4C - TYPE_ARRAY = '[' # 0x5B - - # list of supported typecodes listed above - TYPECODES_LIST = [ - # primitive types - TYPE_BYTE, - TYPE_CHAR, - TYPE_DOUBLE, - TYPE_FLOAT, - TYPE_INTEGER, - TYPE_LONG, - TYPE_SHORT, - TYPE_BOOLEAN, - # object types - TYPE_OBJECT, - TYPE_ARRAY] - - BASE_REFERENCE_IDX = 0x7E0000 - - NUMPY_TYPE_MAP = { - TYPE_BYTE: 'B', - TYPE_CHAR: 'b', - TYPE_DOUBLE: '>d', - TYPE_FLOAT: '>f', - TYPE_INTEGER: '>i', - TYPE_LONG: '>l', - TYPE_SHORT: '>h', - TYPE_BOOLEAN: '>B' - } - - -class OpCodeDebug(object): - """ - OP Codes definition and utility methods - """ - # Type codes - OP_CODE = dict((getattr(JavaObjectConstants, key), key) - for key in dir(JavaObjectConstants) - if key.startswith("TC_")) - - TYPE = dict((getattr(JavaObjectConstants, key), key) - for key in dir(JavaObjectConstants) - if key.startswith("TYPE_")) - - STREAM_CONSTANT = dict((getattr(JavaObjectConstants, key), key) - for key in dir(JavaObjectConstants) - if key.startswith("SC_")) - - @staticmethod - def op_id(op_id): - """ - Returns the name of the given OP Code - :param op_id: OP Code - :return: Name of the OP Code - """ - return OpCodeDebug.OP_CODE.get( - op_id, "".format(op_id)) - - @staticmethod - def type_code(type_id): - """ - Returns the name of the given Type Code - :param type_id: Type code - :return: Name of the type code - """ - return OpCodeDebug.TYPE.get( - type_id, "".format(type_id)) - - @staticmethod - def flags(flags): - """ - Returns the names of the class description flags found in the given - integer - - :param flags: A class description flag entry - :return: The flags names as a single string - """ - names = sorted( - descr for key, descr in OpCodeDebug.STREAM_CONSTANT.items() - if key & flags) - return ', '.join(names) - -# ------------------------------------------------------------------------------ - - -class JavaObjectUnmarshaller(JavaObjectConstants): - """ - Deserializes a Java serialization stream - """ - def __init__(self, stream, use_numpy_arrays=False): - """ - Sets up members - - :param stream: An input stream (opened in binary/bytes mode) - :raise IOError: Invalid input stream - """ - self.use_numpy_arrays = use_numpy_arrays - - # Check stream - if stream is None: - raise IOError("No input stream given") - - # Prepare the association Terminal Symbol -> Reading method - self.opmap = { - self.TC_NULL: self.do_null, - self.TC_CLASSDESC: self.do_classdesc, - self.TC_OBJECT: self.do_object, - self.TC_STRING: self.do_string, - self.TC_LONGSTRING: self.do_string_long, - self.TC_ARRAY: self.do_array, - self.TC_CLASS: self.do_class, - self.TC_BLOCKDATA: self.do_blockdata, - self.TC_BLOCKDATALONG: self.do_blockdata_long, - self.TC_REFERENCE: self.do_reference, - self.TC_ENUM: self.do_enum, - # note that we are reusing do_null: - self.TC_ENDBLOCKDATA: self.do_null, - } - - # Set up members - self.current_object = None - self.reference_counter = 0 - self.references = [] - self.object_transformers = [] - self.object_stream = stream - - # Read the stream header (magic & version) - self._readStreamHeader() - - def readObject(self, ignore_remaining_data=False): - """ - Reads an object from the input stream - - :param ignore_remaining_data: If True, don't log an error when - unused trailing bytes are remaining - :return: The unmarshalled object - :raise Exception: Any exception that occurred during unmarshalling - """ - try: - # TODO: add expects - _, res = self._read_and_exec_opcode(ident=0) - - position_bak = self.object_stream.tell() - the_rest = self.object_stream.read() - if not ignore_remaining_data and len(the_rest): - log_error("Warning!!!!: Stream still has {0} bytes left. " - "Enable debug mode of logging to see the hexdump." - .format(len(the_rest))) - log_debug("\n{0}".format(self._create_hexdump(the_rest))) - else: - log_debug("Java Object unmarshalled successfully!") - - self.object_stream.seek(position_bak) - return res - except Exception: - self._oops_dump_state(ignore_remaining_data) - raise - - def add_transformer(self, transformer): - """ - Appends an object transformer to the deserialization process - - :param transformer: An object with a transform(obj) method - """ - self.object_transformers.append(transformer) - - def _readStreamHeader(self): - """ - Reads the magic header of a Java serialization stream - - :raise IOError: Invalid magic header (not a Java stream) - """ - (magic, version) = self._readStruct(">HH") - if magic != self.STREAM_MAGIC or version != self.STREAM_VERSION: - raise IOError("The stream is not java serialized object. " - "Invalid stream header: {0:04X}{1:04X}" - .format(magic, version)) - - def _read_and_exec_opcode(self, ident=0, expect=None): - """ - Reads the next opcode, and executes its handler - - :param ident: Log identation level - :param expect: A list of expected opcodes - :return: A tuple: (opcode, result of the handler) - :raise IOError: Read opcode is not one of the expected ones - :raise RuntimeError: Unknown opcode - """ - position = self.object_stream.tell() - (opid,) = self._readStruct(">B") - log_debug("OpCode: 0x{0:X} -- {1} (at offset 0x{2:X})" - .format(opid, OpCodeDebug.op_id(opid), position), ident) - - if expect and opid not in expect: - raise IOError( - "Unexpected opcode 0x{0:X} -- {1} (at offset 0x{2:X})" - .format(opid, OpCodeDebug.op_id(opid), position)) - - try: - handler = self.opmap[opid] - except KeyError: - raise RuntimeError( - "Unknown OpCode in the stream: 0x{0:X} (at offset 0x{1:X})" - .format(opid, position)) - else: - return opid, handler(ident=ident) - - def _readStruct(self, unpack): - """ - Reads from the input stream, using struct - - :param unpack: An unpack format string - :return: The result of struct.unpack (tuple) - :raise RuntimeError: End of stream reached during unpacking - """ - length = struct.calcsize(unpack) - ba = self.object_stream.read(length) - - if len(ba) != length: - raise RuntimeError("Stream has been ended unexpectedly while " - "unmarshaling.") - - return struct.unpack(unpack, ba) - - def _readString(self, length_fmt="H"): - """ - Reads a serialized string - - :param length_fmt: Structure format of the string length (H or Q) - :return: The deserialized string - :raise RuntimeError: Unexpected end of stream - """ - (length,) = self._readStruct(">{0}".format(length_fmt)) - ba = self.object_stream.read(length) - return to_str(ba) - - def do_classdesc(self, parent=None, ident=0): - """ - Handles a TC_CLASSDESC opcode - - :param parent: - :param ident: Log indentation level - :return: A JavaClass object - """ - # TC_CLASSDESC className serialVersionUID newHandle classDescInfo - # classDescInfo: - # classDescFlags fields classAnnotation superClassDesc - # classDescFlags: - # (byte) // Defined in Terminal Symbols and Constants - # fields: - # (short) fieldDesc[count] - - # fieldDesc: - # primitiveDesc - # objectDesc - # primitiveDesc: - # prim_typecode fieldName - # objectDesc: - # obj_typecode fieldName className1 - clazz = JavaClass() - log_debug("[classdesc]", ident) - class_name = self._readString() - clazz.name = class_name - log_debug("Class name: %s" % class_name, ident) - - # serialVersionUID is a Java (signed) long => 8 bytes - serialVersionUID, classDescFlags = self._readStruct(">qB") - clazz.serialVersionUID = serialVersionUID - clazz.flags = classDescFlags - - self._add_reference(clazz, ident) - - log_debug("Serial: 0x{0:X} / {0:d} - classDescFlags: 0x{1:X} {2}" - .format(serialVersionUID, classDescFlags, - OpCodeDebug.flags(classDescFlags)), ident) - (length,) = self._readStruct(">H") - log_debug("Fields num: 0x{0:X}".format(length), ident) - - clazz.fields_names = [] - clazz.fields_types = [] - for fieldId in range(length): - (typecode,) = self._readStruct(">B") - field_name = self._readString() - field_type = self._convert_char_to_type(typecode) - - log_debug("> Reading field {0}".format(field_name), ident) - - if field_type == self.TYPE_ARRAY: - _, field_type = self._read_and_exec_opcode( - ident=ident + 1, - expect=(self.TC_STRING, self.TC_REFERENCE)) - - if type(field_type) is not JavaString: - raise AssertionError("Field type must be a JavaString, " - "not {0}".format(type(field_type))) - - elif field_type == self.TYPE_OBJECT: - _, field_type = self._read_and_exec_opcode( - ident=ident + 1, - expect=(self.TC_STRING, self.TC_REFERENCE)) - - if type(field_type) is JavaClass: - # FIXME: ugly trick - field_type = JavaString(field_type.name) - - if type(field_type) is not JavaString: - raise AssertionError("Field type must be a JavaString, " - "not {0}".format(type(field_type))) - - log_debug("< FieldName: 0x{0:X} Name:{1} Type:{2} ID:{3}" - .format(typecode, field_name, field_type, fieldId), - ident) - assert field_name is not None - assert field_type is not None - - clazz.fields_names.append(field_name) - clazz.fields_types.append(field_type) - - if parent: - parent.__fields = clazz.fields_names - parent.__types = clazz.fields_types - - # classAnnotation - (opid,) = self._readStruct(">B") - log_debug("OpCode: 0x{0:X} -- {1} (classAnnotation)" - .format(opid, OpCodeDebug.op_id(opid)), ident) - if opid != self.TC_ENDBLOCKDATA: - raise NotImplementedError("classAnnotation isn't implemented yet") - - # superClassDesc - log_debug("Reading Super Class of {0}".format(clazz.name), ident) - _, superclassdesc = self._read_and_exec_opcode( - ident=ident + 1, - expect=(self.TC_CLASSDESC, self.TC_NULL, self.TC_REFERENCE)) - log_debug("Super Class for {0}: {1}" - .format(clazz.name, str(superclassdesc)), ident) - clazz.superclass = superclassdesc - return clazz - - def do_blockdata(self, parent=None, ident=0): - """ - Handles TC_BLOCKDATA opcode - - :param parent: - :param ident: Log indentation level - :return: A string containing the block data - """ - # TC_BLOCKDATA (unsigned byte) (byte)[size] - log_debug("[blockdata]", ident) - (length,) = self._readStruct(">B") - ba = self.object_stream.read(length) - - # Ensure we have an str - return read_to_str(ba) - - def do_blockdata_long(self, parent=None, ident=0): - """ - Handles TC_BLOCKDATALONG opcode - - :param parent: - :param ident: Log indentation level - :return: A string containing the block data - """ - # TC_BLOCKDATALONG (int) (byte)[size] - log_debug("[blockdatalong]", ident) - (length,) = self._readStruct(">I") - ba = self.object_stream.read(length) - - # Ensure we have an str - return read_to_str(ba) - - def do_class(self, parent=None, ident=0): - """ - Handles TC_CLASS opcode - - :param parent: - :param ident: Log indentation level - :return: A JavaClass object - """ - # TC_CLASS classDesc newHandle - log_debug("[class]", ident) - - # TODO: what to do with "(ClassDesc)prevObject". - # (see 3rd line for classDesc:) - _, classdesc = self._read_and_exec_opcode( - ident=ident + 1, - expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC, - self.TC_NULL, self.TC_REFERENCE)) - log_debug("Classdesc: {0}".format(classdesc), ident) - self._add_reference(classdesc, ident) - return classdesc - - def do_object(self, parent=None, ident=0): - """ - Handles a TC_OBJECT opcode - - :param parent: - :param ident: Log indentation level - :return: A JavaClass object - """ - # TC_OBJECT classDesc newHandle classdata[] // data for each class - java_object = JavaObject() - log_debug("[object]", ident) - log_debug("java_object.annotations just after instantiation: {0}" - .format(java_object.annotations), ident) - - # TODO: what to do with "(ClassDesc)prevObject". - # (see 3rd line for classDesc:) - opcode, classdesc = self._read_and_exec_opcode( - ident=ident + 1, - expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC, - self.TC_NULL, self.TC_REFERENCE)) - # self.TC_REFERENCE hasn't shown in spec, but actually is here - - # Create object - for transformer in self.object_transformers: - java_object = transformer.create(classdesc, self) - if java_object is not None: - break - - # Store classdesc of this object - java_object.classdesc = classdesc - - # Store the reference - self._add_reference(java_object, ident) - - # classdata[] - - if classdesc.flags & self.SC_EXTERNALIZABLE \ - and not classdesc.flags & self.SC_BLOCK_DATA: - # TODO: - raise NotImplementedError("externalContents isn't implemented yet") - - if classdesc.flags & self.SC_SERIALIZABLE: - # TODO: look at ObjectInputStream.readSerialData() - # FIXME: Handle the SC_WRITE_METHOD flag - - # create megalist - tempclass = classdesc - megalist = [] - megatypes = [] - log_debug("Constructing class...", ident) - while tempclass: - log_debug("Class: {0}".format(tempclass.name), ident + 1) - class_fields_str = ' - '.join( - ' '.join((field_type, field_name)) - for field_type, field_name - in zip(tempclass.fields_types, tempclass.fields_names)) - if class_fields_str: - log_debug(class_fields_str, ident + 2) - - fieldscopy = tempclass.fields_names[:] - fieldscopy.extend(megalist) - megalist = fieldscopy - - fieldscopy = tempclass.fields_types[:] - fieldscopy.extend(megatypes) - megatypes = fieldscopy - - tempclass = tempclass.superclass - - log_debug("Values count: {0}".format(len(megalist)), ident) - log_debug("Prepared list of values: {0}".format(megalist), ident) - log_debug("Prepared list of types: {0}".format(megatypes), ident) - - for field_name, field_type in zip(megalist, megatypes): - log_debug("Reading field: {0} - {1}" - .format(field_type, field_name)) - res = self._read_value(field_type, ident, name=field_name) - java_object.__setattr__(field_name, res) - - if classdesc.flags & self.SC_SERIALIZABLE \ - and classdesc.flags & self.SC_WRITE_METHOD \ - or classdesc.flags & self.SC_EXTERNALIZABLE \ - and classdesc.flags & self.SC_BLOCK_DATA: - # objectAnnotation - log_debug("java_object.annotations before: {0}" - .format(java_object.annotations), ident) - - while opcode != self.TC_ENDBLOCKDATA: - opcode, obj = self._read_and_exec_opcode(ident=ident + 1) - # , expect=[self.TC_ENDBLOCKDATA, self.TC_BLOCKDATA, - # self.TC_OBJECT, self.TC_NULL, self.TC_REFERENCE]) - if opcode != self.TC_ENDBLOCKDATA: - java_object.annotations.append(obj) - - log_debug("objectAnnotation value: {0}".format(obj), ident) - - log_debug("java_object.annotations after: {0}" - .format(java_object.annotations), ident) - - # Allow extra loading operations - if hasattr(java_object, "__extra_loading__"): - log_debug("Java object has extra loading capability.") - java_object.__extra_loading__(self, ident) - - log_debug(">>> java_object: {0}".format(java_object), ident) - return java_object - - def do_string(self, parent=None, ident=0): - """ - Handles a TC_STRING opcode - - :param parent: - :param ident: Log indentation level - :return: A string - """ - log_debug("[string]", ident) - ba = JavaString(self._readString()) - self._add_reference(ba, ident) - return ba - - def do_string_long(self, parent=None, ident=0): - """ - Handles a TC_LONGSTRING opcode - - :param parent: - :param ident: Log indentation level - :return: A string - """ - log_debug("[long string]", ident) - ba = JavaString(self._readString("Q")) - self._add_reference(ba, ident) - return ba - - def do_array(self, parent=None, ident=0): - """ - Handles a TC_ARRAY opcode - - :param parent: - :param ident: Log indentation level - :return: A list of deserialized objects - """ - # TC_ARRAY classDesc newHandle (int) values[size] - log_debug("[array]", ident) - _, classdesc = self._read_and_exec_opcode( - ident=ident + 1, - expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC, - self.TC_NULL, self.TC_REFERENCE)) - - array = JavaArray(classdesc) - - self._add_reference(array, ident) - - (size,) = self._readStruct(">i") - log_debug("size: {0}".format(size), ident) - - type_char = classdesc.name[0] - assert type_char == self.TYPE_ARRAY - type_char = classdesc.name[1] - - if type_char == self.TYPE_OBJECT or type_char == self.TYPE_ARRAY: - for _ in range(size): - _, res = self._read_and_exec_opcode(ident=ident + 1) - log_debug("Object value: {0}".format(res), ident) - array.append(res) - elif type_char == self.TYPE_BYTE: - array = JavaByteArray(self.object_stream.read(size), classdesc) - elif self.use_numpy_arrays: - import numpy - array = numpy.fromfile( - self.object_stream, - dtype=JavaObjectConstants.NUMPY_TYPE_MAP[type_char], - count=size) - else: - for _ in range(size): - res = self._read_value(type_char, ident) - log_debug("Native value: {0}".format(res), ident) - array.append(res) - - return array - - def do_reference(self, parent=None, ident=0): - """ - Handles a TC_REFERENCE opcode - - :param parent: - :param ident: Log indentation level - :return: The referenced object - """ - (handle,) = self._readStruct(">L") - log_debug("## Reference handle: 0x{0:X}".format(handle), ident) - ref = self.references[handle - self.BASE_REFERENCE_IDX] - log_debug("###-> Type: {0} - Value: {1}".format(type(ref), ref), ident) - return ref - - @staticmethod - def do_null(parent=None, ident=0): - """ - Handles a TC_NULL opcode - - :param parent: - :param ident: Log indentation level - :return: Always None - """ - return None - - def do_enum(self, parent=None, ident=0): - """ - Handles a TC_ENUM opcode - - :param parent: - :param ident: Log indentation level - :return: A JavaEnum object - """ - # TC_ENUM classDesc newHandle enumConstantName - enum = JavaEnum() - _, classdesc = self._read_and_exec_opcode( - ident=ident + 1, - expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC, - self.TC_NULL, self.TC_REFERENCE)) - enum.classdesc = classdesc - self._add_reference(enum, ident) - _, enumConstantName = self._read_and_exec_opcode( - ident=ident + 1, expect=(self.TC_STRING, self.TC_REFERENCE)) - enum.constant = enumConstantName - return enum - - @staticmethod - def _create_hexdump(src, start_offset=0, length=16): - """ - Prepares an hexadecimal dump string - - :param src: A string containing binary data - :param start_offset: The start offset of the source - :param length: Length of a dump line - :return: A dump string - """ - FILTER = ''.join((len(repr(chr(x))) == 3) and chr(x) or '.' - for x in range(256)) - pattern = "{{0:04X}} {{1:<{0}}} {{2}}\n".format(length * 3) - - # Convert raw data to str (Python 3 compatibility) - src = to_str(src, 'latin-1') - - result = [] - for i in range(0, len(src), length): - s = src[i:i + length] - hexa = ' '.join("{0:02X}".format(ord(x)) for x in s) - printable = s.translate(FILTER) - result.append(pattern.format(i + start_offset, hexa, printable)) - - return ''.join(result) - - def _read_value(self, field_type, ident, name=""): - """ - Reads the next value, of the given type - - :param field_type: A serialization typecode - :param ident: Log indentation - :param name: Field name (for logs) - :return: The read value - :raise RuntimeError: Unknown field type - """ - if len(field_type) > 1: - # We don't need details for arrays and objects - field_type = field_type[0] - - if field_type == self.TYPE_BOOLEAN: - (val,) = self._readStruct(">B") - res = bool(val) - elif field_type == self.TYPE_BYTE: - (res,) = self._readStruct(">b") - elif field_type == self.TYPE_CHAR: - # TYPE_CHAR is defined by the serialization specification - # but not used in the implementation, so this is - # a hypothetical code - res = bytes(self._readStruct(">bb")).decode("utf-16-be") - elif field_type == self.TYPE_SHORT: - (res,) = self._readStruct(">h") - elif field_type == self.TYPE_INTEGER: - (res,) = self._readStruct(">i") - elif field_type == self.TYPE_LONG: - (res,) = self._readStruct(">q") - elif field_type == self.TYPE_FLOAT: - (res,) = self._readStruct(">f") - elif field_type == self.TYPE_DOUBLE: - (res,) = self._readStruct(">d") - elif field_type == self.TYPE_OBJECT or field_type == self.TYPE_ARRAY: - _, res = self._read_and_exec_opcode(ident=ident + 1) - else: - raise RuntimeError("Unknown typecode: {0}".format(field_type)) - - log_debug("* {0} {1}: {2}".format(field_type, name, res), ident) - return res - - def _convert_char_to_type(self, type_char): - """ - Ensures a read character is a typecode. - - :param type_char: Read typecode - :return: The typecode as a string (using chr) - :raise RuntimeError: Unknown typecode - """ - typecode = type_char - if type(type_char) is int: - typecode = chr(type_char) - - if typecode in self.TYPECODES_LIST: - return typecode - else: - raise RuntimeError("Typecode {0} ({1}) isn't supported." - .format(type_char, typecode)) - - def _add_reference(self, obj, ident=0): - """ - Adds a read reference to the marshaler storage - - :param obj: Reference to add - :param ident: Log indentation level - """ - log_debug("## New reference handle 0x{0:X}: {1} -> {2}" - .format(len(self.references) + self.BASE_REFERENCE_IDX, - type(obj).__name__, obj), ident) - self.references.append(obj) - - def _oops_dump_state(self, ignore_remaining_data=False): - """ - Log a deserialization error - - :param ignore_remaining_data: If True, don't log an error when - unused trailing bytes are remaining - """ - log_error("==Oops state dump" + "=" * (30 - 17)) - log_error("References: {0}".format(self.references)) - log_error("Stream seeking back at -16 byte (2nd line is an actual " - "position!):") - - # Do not use a keyword argument - self.object_stream.seek(-16, os.SEEK_CUR) - position = self.object_stream.tell() - the_rest = self.object_stream.read() - - if not ignore_remaining_data and len(the_rest): - log_error( - "Warning!!!!: Stream still has {0} bytes left:\n{1}".format( - len(the_rest), self._create_hexdump(the_rest, position))) - - log_error("=" * 30) - -# ------------------------------------------------------------------------------ - - -class JavaObjectMarshaller(JavaObjectConstants): - """ - Serializes objects into Java serialization format - """ - def __init__(self, stream=None): - """ - Sets up members - - :param stream: An output stream - """ - self.object_stream = stream - self.object_obj = None - self.object_transformers = [] - self.references = [] - - def add_transformer(self, transformer): - """ - Appends an object transformer to the serialization process - - :param transformer: An object with a transform(obj) method - """ - self.object_transformers.append(transformer) - - def dump(self, obj): - """ - Dumps the given object in the Java serialization format - """ - self.references = [] - self.object_obj = obj - self.object_stream = BytesIO() - self._writeStreamHeader() - self.writeObject(obj) - return self.object_stream.getvalue() - - def _writeStreamHeader(self): - """ - Writes the Java serialization magic header in the serialization stream - """ - self._writeStruct(">HH", 4, (self.STREAM_MAGIC, self.STREAM_VERSION)) - - def writeObject(self, obj): - """ - Appends an object to the serialization stream - - :param obj: A string or a deserialized Java object - :raise RuntimeError: Unsupported type - """ - log_debug("Writing object of type {0}".format(type(obj).__name__)) - if isinstance(obj, JavaArray): - # Deserialized Java array - self.write_array(obj) - elif isinstance(obj, JavaEnum): - # Deserialized Java Enum - self.write_enum(obj) - elif isinstance(obj, JavaObject): - # Deserialized Java object - self.write_object(obj) - elif isinstance(obj, JavaString): - # Deserialized String - self.write_string(obj) - elif isinstance(obj, JavaClass): - # Java class - self.write_class(obj) - elif obj is None: - # Null - self.write_null() - elif type(obj) is str: - # String value - self.write_blockdata(obj) - else: - # Unhandled type - raise RuntimeError("Object serialization of type {0} is not " - "supported.".format(type(obj))) - - def _writeStruct(self, unpack, length, args): - """ - Appends data to the serialization stream - - :param unpack: Struct format string - :param length: Unused - :param args: Struct arguments - """ - ba = struct.pack(unpack, *args) - self.object_stream.write(ba) - - def _writeString(self, obj, use_reference=True): - """ - Appends a string to the serialization stream - - :param obj: String to serialize - :param use_reference: If True, allow writing a reference - """ - # TODO: Convert to "modified UTF-8" - # http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8 - string = to_bytes(obj, "utf-8") - - if use_reference and isinstance(obj, JavaString): - try: - idx = self.references.index(obj) - except ValueError: - # First appearance of the string - self.references.append(obj) - logging.debug( - "*** Adding ref 0x%X for string: %s", - len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj) - - self._writeStruct(">H", 2, (len(string),)) - self.object_stream.write(string) - else: - # Write a reference to the previous type - logging.debug("*** Reusing ref 0x%X for string: %s", - idx + self.BASE_REFERENCE_IDX, obj) - self.write_reference(idx) - else: - self._writeStruct(">H", 2, (len(string),)) - self.object_stream.write(string) - - def write_string(self, obj, use_reference=True): - """ - Writes a Java string with the TC_STRING type marker - - :param obj: The string to print - :param use_reference: If True, allow writing a reference - """ - if use_reference and isinstance(obj, JavaString): - try: - idx = self.references.index(obj) - except ValueError: - # String is not referenced: let _writeString store it - self._writeStruct(">B", 1, (self.TC_STRING,)) - self._writeString(obj, use_reference) - else: - # Reuse the referenced string - logging.debug("*** Reusing ref 0x%X for String: %s", - idx + self.BASE_REFERENCE_IDX, obj) - self.write_reference(idx) - else: - # Don't use references - self._writeStruct(">B", 1, (self.TC_STRING,)) - self._writeString(obj, use_reference) - - def write_enum(self, obj): - """ - Writes an Enum value - - :param obj: A JavaEnum object - """ - # FIXME: the output doesn't have the same references as the real - # serializable form - self._writeStruct(">B", 1, (self.TC_ENUM,)) - - try: - idx = self.references.index(obj) - except ValueError: - # New reference - self.references.append(obj) - logging.debug( - "*** Adding ref 0x%X for enum: %s", - len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj) - - self.write_classdesc(obj.get_class()) - else: - self.write_reference(idx) - - self.write_string(obj.constant) - - def write_blockdata(self, obj, parent=None): - """ - Appends a block of data to the serialization stream - - :param obj: String form of the data block - """ - if type(obj) is str: - # Latin-1: keep bytes as is - obj = to_bytes(obj, "latin-1") - - length = len(obj) - if length <= 256: - # Small block data - # TC_BLOCKDATA (unsigned byte) (byte)[size] - self._writeStruct(">B", 1, (self.TC_BLOCKDATA,)) - self._writeStruct(">B", 1, (length,)) - else: - # Large block data - # TC_BLOCKDATALONG (unsigned int) (byte)[size] - self._writeStruct(">B", 1, (self.TC_BLOCKDATALONG,)) - self._writeStruct(">I", 1, (length,)) - - self.object_stream.write(obj) - - def write_null(self): - """ - Writes a "null" value - """ - self._writeStruct(">B", 1, (self.TC_NULL,)) - - def write_object(self, obj, parent=None): - """ - Writes an object header to the serialization stream - - :param obj: Not yet used - :param parent: Not yet used - """ - # Transform object - for transformer in self.object_transformers: - tmp_object = transformer.transform(obj) - if tmp_object is not obj: - obj = tmp_object - break - - self._writeStruct(">B", 1, (self.TC_OBJECT,)) - cls = obj.get_class() - self.write_classdesc(cls) - - # Add reference - self.references.append([]) - logging.debug( - "*** Adding ref 0x%X for object %s", - len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj) - - all_names = collections.deque() - all_types = collections.deque() - tmpcls = cls - while tmpcls: - all_names.extendleft(reversed(tmpcls.fields_names)) - all_types.extendleft(reversed(tmpcls.fields_types)) - tmpcls = tmpcls.superclass - del tmpcls - - logging.debug("<=> Field names: %s", all_names) - logging.debug("<=> Field types: %s", all_types) - - for field_name, field_type in zip(all_names, all_types): - try: - logging.debug("Writing field %s (%s): %s", - field_name, field_type, getattr(obj, field_name)) - self._write_value(field_type, getattr(obj, field_name)) - except AttributeError as ex: - log_error("No attribute {0} for object {1}\nDir: {2}" - .format(ex, repr(obj), dir(obj))) - raise - del all_names, all_types - - if cls.flags & self.SC_SERIALIZABLE \ - and cls.flags & self.SC_WRITE_METHOD \ - or cls.flags & self.SC_EXTERNALIZABLE \ - and cls.flags & self.SC_BLOCK_DATA: - for annotation in obj.annotations: - log_debug("Write annotation {0} for {1}" - .format(repr(annotation), repr(obj))) - if annotation is None: - self.write_null() - else: - self.writeObject(annotation) - self._writeStruct('>B', 1, (self.TC_ENDBLOCKDATA,)) - - def write_class(self, obj, parent=None): - """ - Writes a class to the stream - - :param obj: A JavaClass object - :param parent: - """ - self._writeStruct(">B", 1, (self.TC_CLASS,)) - self.write_classdesc(obj) - - def write_classdesc(self, obj, parent=None): - """ - Writes a class description - - :param obj: Class description to write - :param parent: - """ - if obj not in self.references: - # Add reference - self.references.append(obj) - logging.debug( - "*** Adding ref 0x%X for classdesc %s", - len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj.name) - - self._writeStruct(">B", 1, (self.TC_CLASSDESC,)) - self._writeString(obj.name) - self._writeStruct(">qB", 1, (obj.serialVersionUID, obj.flags)) - self._writeStruct(">H", 1, (len(obj.fields_names),)) - - for field_name, field_type \ - in zip(obj.fields_names, obj.fields_types): - self._writeStruct( - ">B", 1, (self._convert_type_to_char(field_type),)) - self._writeString(field_name) - if field_type[0] in (self.TYPE_OBJECT, self.TYPE_ARRAY): - try: - idx = self.references.index(field_type) - except ValueError: - # First appearance of the type - self.references.append(field_type) - logging.debug( - "*** Adding ref 0x%X for field type %s", - len(self.references) - 1 + self.BASE_REFERENCE_IDX, - field_type) - - self.write_string(field_type, False) - else: - # Write a reference to the previous type - logging.debug("*** Reusing ref 0x%X for %s (%s)", - idx + self.BASE_REFERENCE_IDX, - field_type, field_name) - self.write_reference(idx) - - self._writeStruct(">B", 1, (self.TC_ENDBLOCKDATA,)) - if obj.superclass: - self.write_classdesc(obj.superclass) - else: - self.write_null() - else: - # Use reference - self.write_reference(self.references.index(obj)) - - def write_reference(self, ref_index): - """ - Writes a reference - :param ref_index: Local index (0-based) to the reference - """ - self._writeStruct( - ">BL", 1, (self.TC_REFERENCE, ref_index + self.BASE_REFERENCE_IDX)) - - def write_array(self, obj): - """ - Writes a JavaArray - - :param obj: A JavaArray object - """ - classdesc = obj.get_class() - self._writeStruct(">B", 1, (self.TC_ARRAY,)) - self.write_classdesc(classdesc) - self._writeStruct(">i", 1, (len(obj),)) - - # Add reference - self.references.append(obj) - logging.debug( - "*** Adding ref 0x%X for array []", - len(self.references) - 1 + self.BASE_REFERENCE_IDX) - - type_char = classdesc.name[0] - assert type_char == self.TYPE_ARRAY - type_char = classdesc.name[1] - - if type_char == self.TYPE_OBJECT: - for o in obj: - self._write_value(classdesc.name[1:], o) - elif type_char == self.TYPE_ARRAY: - for a in obj: - self.write_array(a) - else: - log_debug("Write array of type %s" % type_char) - for v in obj: - log_debug("Writing: %s" % v) - self._write_value(type_char, v) - - def _write_value(self, field_type, value): - """ - Writes an item of an array - - :param field_type: Value type - :param value: The value itself - """ - if len(field_type) > 1: - # We don't need details for arrays and objects - field_type = field_type[0] - - if field_type == self.TYPE_BOOLEAN: - self._writeStruct(">B", 1, (1 if value else 0,)) - elif field_type == self.TYPE_BYTE: - self._writeStruct(">b", 1, (value,)) - elif field_type == self.TYPE_SHORT: - self._writeStruct(">h", 1, (value,)) - elif field_type == self.TYPE_INTEGER: - self._writeStruct(">i", 1, (value,)) - elif field_type == self.TYPE_LONG: - self._writeStruct(">q", 1, (value,)) - elif field_type == self.TYPE_FLOAT: - self._writeStruct(">f", 1, (value,)) - elif field_type == self.TYPE_DOUBLE: - self._writeStruct(">d", 1, (value,)) - elif field_type == self.TYPE_OBJECT or field_type == self.TYPE_ARRAY: - if value is None: - self.write_null() - elif isinstance(value, JavaEnum): - self.write_enum(value) - elif isinstance(value, (JavaArray, JavaByteArray)): - self.write_array(value) - elif isinstance(value, JavaObject): - self.write_object(value) - elif isinstance(value, JavaString): - self.write_string(value) - elif isinstance(value, str): - self.write_blockdata(value) - else: - raise RuntimeError("Unknown typecode: {0}".format(field_type)) - else: - raise RuntimeError("Unknown typecode: {0}".format(field_type)) - - def _convert_type_to_char(self, type_char): - """ - Converts the given type code to an int - - :param type_char: A type code character - """ - typecode = type_char - if type(type_char) is int: - typecode = chr(type_char) - - if typecode in self.TYPECODES_LIST: - return ord(typecode) - elif len(typecode) > 1: - if typecode[0] == 'L': - return ord(self.TYPE_OBJECT) - elif typecode[0] == '[': - return ord(self.TYPE_ARRAY) - - raise RuntimeError("Typecode {0} ({1}) isn't supported." - .format(type_char, typecode)) - -# ------------------------------------------------------------------------------ - - -class DefaultObjectTransformer(object): - """ - Default transformer for the deserialized objects. - Converts JavaObject objects to Python types (maps, lists, ...) - """ - class JavaList(list, JavaObject): - """ - Python-Java list bridge type - """ - def __init__(self, unmarshaller): - # type: (JavaObjectUnmarshaller) -> None - list.__init__(self) - JavaObject.__init__(self) - - def __extra_loading__(self, unmarshaller, ident=0): - # type: (JavaObjectUnmarshaller, int) -> None - """ - Loads the content of the map, written with a custom implementation - """ - # Lists have their content in there annotations - self.extend(self.annotations[1:]) - - class JavaMap(dict, JavaObject): - """ - Python-Java dictionary/map bridge type - """ - def __init__(self, unmarshaller): - # type: (JavaObjectUnmarshaller) -> None - dict.__init__(self) - JavaObject.__init__(self) - - def __extra_loading__(self, unmarshaller, ident=0): - # type: (JavaObjectUnmarshaller, int) -> None - """ - Loads the content of the map, written with a custom implementation - """ - # Group annotation elements 2 by 2 - args = [iter(self.annotations[1:])] * 2 - for key, value in zip(*args): - self[key] = value - - class JavaLinkedHashMap(JavaMap): - def __extra_loading__(self, unmarshaller, ident=0): - # type: (JavaObjectUnmarshaller, int) -> None - """ - Loads the content of the map, written with a custom implementation - """ - # Ignore the blockdata opid - (opid,) = unmarshaller._readStruct(">B") - if opid != unmarshaller.SC_BLOCK_DATA: - raise ValueError("Start of block data not found") - - # Read HashMap fields - self.buckets = unmarshaller._read_value( - unmarshaller.TYPE_INTEGER, ident) - self.size = unmarshaller._read_value( - unmarshaller.TYPE_INTEGER, ident) - - # Read entries - for _ in range(self.size): - key = unmarshaller._read_and_exec_opcode()[1] - value = unmarshaller._read_and_exec_opcode()[1] - self[key] = value - - # Ignore the end of the blockdata - unmarshaller._read_and_exec_opcode( - ident, [unmarshaller.TC_ENDBLOCKDATA]) - - # Ignore the trailing 0 - (opid,) = unmarshaller._readStruct(">B") - if opid != 0: - raise ValueError("Should find 0x0, got {0:x}".format(opid)) - - TYPE_MAPPER = { - "java.util.ArrayList": JavaList, - "java.util.LinkedList": JavaList, - "java.util.HashMap": JavaMap, - "java.util.LinkedHashMap": JavaLinkedHashMap, - "java.util.TreeMap": JavaMap, - } - - def create(self, classdesc, unmarshaller=None): - # type: (JavaClass, JavaObjectUnmarshaller) -> JavaObject - """ - Transforms a deserialized Java object into a Python object - - :param classdesc: The description of a Java class - :return: The Python form of the object, or the original JavaObject - """ - try: - mapped_type = self.TYPE_MAPPER[classdesc.name] - except KeyError: - # Return a JavaObject by default - return JavaObject() - else: - log_debug("---") - log_debug(classdesc.name) - log_debug("---") - - java_object = mapped_type(unmarshaller) - - log_debug(">>> java_object: {0}".format(java_object)) - return java_object diff --git a/javaobj/__init__.py b/javaobj/__init__.py new file mode 100644 index 0000000..d1b146d --- /dev/null +++ b/javaobj/__init__.py @@ -0,0 +1,48 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Provides functions for reading and writing (writing is WIP currently) Java +objects serialized or will be deserialized by ObjectOutputStream. This form of +object representation is a standard data interchange format in Java world. + +javaobj module exposes an API familiar to users of the standard library +marshal, pickle and json modules. + +See: +http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html + +:authors: Volodymyr Buell, Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Imports giving access to what the javaobj module provides +from javaobj.v1.beans import * # noqa: F401,F403 +from javaobj.v1.core import * # noqa: F401,F403 +from javaobj.v1.transformers import * # noqa: F401,F403 + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" diff --git a/javaobj/constants.py b/javaobj/constants.py new file mode 100644 index 0000000..d4dd1cb --- /dev/null +++ b/javaobj/constants.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +""" +Definition of the constants used in the deserialization process + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import enum + +# ------------------------------------------------------------------------------ + +__all__ = ( + "PRIMITIVE_TYPES", + "StreamConstants", + "TerminalCode", + "ClassDescFlags", + "TypeCode", + "StreamCodeDebug", +) + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class StreamConstants(enum.IntEnum): + """ + Basic constants of the stream protocol + """ + + # Magic bytes of any serialized files + STREAM_MAGIC = 0xACED + + # Only protocol version supported by javaobj + STREAM_VERSION = 0x05 + + # Base index for handles + BASE_REFERENCE_IDX = 0x7E0000 + + +class TerminalCode(enum.IntEnum): + """ + Stream type Codes + """ + + TC_NULL = 0x70 + TC_REFERENCE = 0x71 + TC_CLASSDESC = 0x72 + TC_OBJECT = 0x73 + TC_STRING = 0x74 + TC_ARRAY = 0x75 + TC_CLASS = 0x76 + TC_BLOCKDATA = 0x77 + TC_ENDBLOCKDATA = 0x78 + TC_RESET = 0x79 + TC_BLOCKDATALONG = 0x7A + TC_EXCEPTION = 0x7B + TC_LONGSTRING = 0x7C + TC_PROXYCLASSDESC = 0x7D + TC_ENUM = 0x7E + # Ignore TC_MAX: we don't use it and it messes with TC_ENUM + # TC_MAX = 0x7E + + +class ClassDescFlags(enum.IntEnum): + """ + Class description flags + """ + + SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE + SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE + SC_SERIALIZABLE = 0x02 + SC_EXTERNALIZABLE = 0x04 + SC_ENUM = 0x10 + + +class TypeCode(enum.IntEnum): + """ + Type definition chars (typecode) + """ + + # Primitive types + TYPE_BYTE = ord("B") # 0x42 + TYPE_CHAR = ord("C") # 0x43 + TYPE_DOUBLE = ord("D") # 0x44 + TYPE_FLOAT = ord("F") # 0x46 + TYPE_INTEGER = ord("I") # 0x49 + TYPE_LONG = ord("J") # 0x4A + TYPE_SHORT = ord("S") # 0x53 + TYPE_BOOLEAN = ord("Z") # 0x5A + # Object types + TYPE_OBJECT = ord("L") # 0x4C + TYPE_ARRAY = ord("[") # 0x5B + + +# List of the types defined as primitive +PRIMITIVE_TYPES = ( + TypeCode.TYPE_BYTE, + TypeCode.TYPE_CHAR, + TypeCode.TYPE_DOUBLE, + TypeCode.TYPE_FLOAT, + TypeCode.TYPE_INTEGER, + TypeCode.TYPE_LONG, + TypeCode.TYPE_SHORT, + TypeCode.TYPE_BOOLEAN, +) + + +class StreamCodeDebug: + """ + Codes utility methods + """ + + @staticmethod + def op_id(op_id): + # type: (int) -> str + """ + Returns the name of the given OP Code + :param op_id: OP Code + :return: Name of the OP Code + """ + try: + return TerminalCode(op_id).name + except ValueError: + return "".format(op_id) + + @staticmethod + def type_code(type_id): + # type: (int) -> str + """ + Returns the name of the given Type Code + :param type_id: Type code + :return: Name of the type code + """ + try: + return TypeCode(type_id).name + except ValueError: + return "".format(type_id) + + @staticmethod + def flags(flags): + # type: (int) -> str + """ + Returns the names of the class description flags found in the given + integer + + :param flags: A class description flag entry + :return: The flags names as a single string + """ + names = sorted(key.name for key in ClassDescFlags if key & flags) + return ", ".join(names) diff --git a/javaobj/modifiedutf8.py b/javaobj/modifiedutf8.py new file mode 100644 index 0000000..ac29ce5 --- /dev/null +++ b/javaobj/modifiedutf8.py @@ -0,0 +1,255 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Implements the support of the Java-specific kind of UTF-8 encoding. + +This module is a modified version of ``py2jdbc.mutf8`` provided by +`@guywithface `_. + +The project the original file comes from is available at: +https://github.com/swstephe/py2jdbc/ + +:authors: Scott Stephens (@swstephe), @guywithface +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha +""" + +from __future__ import unicode_literals + +import sys + + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# Encoding name: not cesu-8, which uses a different zero-byte +NAME = "mutf8" + +# ------------------------------------------------------------------------------ + +if sys.version_info[0] >= 3: + unicode_char = chr # pylint:disable=C0103 + + def byte_to_int(data): + # type: (bytes) -> int + """ + Converts the first byte of the given data to an integer + """ + if isinstance(data, int): + return data + + if isinstance(data, bytes): + return data[0] + + raise ValueError( + "Expected byte or int as input, got: {0}".format( + type(data).__name__ + ) + ) + + +else: + unicode_char = ( + unichr # pylint:disable=C0103,undefined-variable # noqa: F821 + ) + + def byte_to_int(data): + # type: (bytes) -> int + """ + Converts the first byte of the given data to an integer + """ + if isinstance(data, int): + return data + + if isinstance(data, str): + return ord(data[0]) + + raise ValueError( + "Expected byte or int as input, got: {0}".format( + type(data).__name__ + ) + ) + + +# ------------------------------------------------------------------------------ + + +class DecodeMap(object): # pylint:disable=R0205 + """ + A utility class which manages masking, comparing and mapping in bits. + If the mask and compare fails, this will raise UnicodeDecodeError so + encode and decode will correctly handle bad characters. + """ + + def __init__(self, count, mask, value, bits): + """ + Initialize a DecodeMap, entry from a static dictionary for the module. + It automatically calculates the mask for the bits for the value + (always assumed to be at the bottom of the byte). + + :param count: The number of bytes in this entire sequence. + :param mask: The mask to apply to the byte at this position. + :param value: The value of masked bits, (without shifting). + :param bits: The number of bits. + """ + self.count = count + self.mask = mask + self.value = value + self.bits = bits + self.mask2 = (1 << bits) - 1 + + def apply(self, byte, value, data, i, count): + """ + Apply mask, compare to expected value, shift and return result. + Eventually, this could become a ``reduce`` function. + + :param byte: The byte to compare + :param value: The currently accumulated value. + :param data: The data buffer, (array of bytes). + :param i: The position within the data buffer. + :param count: The position of this comparison. + :return: A new value with the bits merged in. + :raises UnicodeDecodeError: if marked bits don't match. + """ + if byte & self.mask == self.value: + value <<= self.bits + value |= byte & self.mask2 + else: + raise UnicodeDecodeError( + NAME, + data, + i, + i + count, + "invalid {}-byte sequence".format(self.count), + ) + return value + + def __repr__(self): + return "DecodeMap({})".format( + ", ".join( + "{}=0x{:02x}".format(n, getattr(self, n)) + for n in ("count", "mask", "value", "bits", "mask2") + ) + ) + + +DECODER_MAP = { + 2: ((0xC0, 0x80, 6),), + 3: ((0xC0, 0x80, 6), (0xC0, 0x80, 6)), + 6: ( + (0xF0, 0xA0, 4), + (0xC0, 0x80, 6), + (0xFF, 0xED, 0), + (0xF0, 0xB0, 4), + (0xC0, 0x80, 6), + ), +} + +DECODE_MAP = dict( + (k, tuple(DecodeMap(k, *vv) for vv in v)) for k, v in DECODER_MAP.items() +) + + +def decoder(data): + """ + This generator processes a sequence of bytes in Modified UTF-8 encoding + and produces a sequence of unicode string characters. + + It takes bits from the byte until it matches one of the known encoding + sequences. + It uses ``DecodeMap`` to mask, compare and generate values. + + :param data: a string of bytes in Modified UTF-8 encoding. + :return: a generator producing a string of unicode characters + :raises UnicodeDecodeError: unrecognised byte in sequence encountered. + """ + + def next_byte(_it, start, count): + try: + return next(_it)[1] + except StopIteration: + raise UnicodeDecodeError( + NAME, data, start, start + count, "incomplete byte sequence" + ) + + it = iter(enumerate(data)) + for i, d in it: + if d == 0x00: # 00000000 + raise UnicodeDecodeError( + NAME, data, i, i + 1, "embedded zero-byte not allowed" + ) + + if d & 0x80: # 1xxxxxxx + if d & 0x40: # 11xxxxxx + if d & 0x20: # 111xxxxx + if d & 0x10: # 1111xxxx + raise UnicodeDecodeError( + NAME, data, i, i + 1, "invalid encoding character" + ) + + if d == 0xED: + value = 0 + for i1, dm in enumerate(DECODE_MAP[6]): + d1 = next_byte(it, i, i1 + 1) + value = dm.apply(d1, value, data, i, i1 + 1) + else: # 1110xxxx + value = d & 0x0F + for i1, dm in enumerate(DECODE_MAP[3]): + d1 = next_byte(it, i, i1 + 1) + value = dm.apply(d1, value, data, i, i1 + 1) + else: # 110xxxxx + value = d & 0x1F + for i1, dm in enumerate(DECODE_MAP[2]): + d1 = next_byte(it, i, i1 + 1) + value = dm.apply(d1, value, data, i, i1 + 1) + else: # 10xxxxxx + raise UnicodeDecodeError( + NAME, data, i, i + 1, "misplaced continuation character" + ) + else: # 0xxxxxxx + value = d + # noinspection PyCompatibility + yield mutf8_unichr(value) + + +def decode_modified_utf8(data, errors="strict"): + """ + Decodes a sequence of bytes to a unicode text and length using + Modified UTF-8. + This function is designed to be used with Python ``codecs`` module. + + :param data: a string of bytes in Modified UTF-8 + :param errors: handle decoding errors + :return: unicode text and length + :raises UnicodeDecodeError: sequence is invalid. + """ + value, length = "", 0 + it = iter(decoder(byte_to_int(d) for d in data)) + while True: + try: + value += next(it) + length += 1 + except StopIteration: + break + except UnicodeDecodeError as e: + if errors == "strict": + raise e + + if errors == "ignore": + pass + elif errors == "replace": + value += "\uFFFD" + length += 1 + return value, length + + +def mutf8_unichr(value): + """ + Mimics Python 2 unichr() and Python 3 chr() + """ + return unicode_char(value) diff --git a/javaobj/utils.py b/javaobj/utils.py new file mode 100644 index 0000000..2d6f761 --- /dev/null +++ b/javaobj/utils.py @@ -0,0 +1,276 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Provides utility methods used by the core implementation of javaobj. + +Namely: logging methods, bytes/str/unicode converters + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +# Standard library +from typing import IO, Tuple # noqa: F401 +import gzip +import logging +import os +import struct +import sys + +# Modified UTF-8 parser +from .modifiedutf8 import byte_to_int, decode_modified_utf8 + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +# Setup the logger +_log = logging.getLogger("javaobj") + + +def log_debug(message, ident=0): + """ + Logs a message at debug level + + :param message: Message to log + :param ident: Number of indentation spaces + """ + _log.debug("%s%s", " " * (ident * 2), message) + + +def log_error(message, ident=0): + """ + Logs a message at error level + + :param message: Message to log + :param ident: Number of indentation spaces + """ + _log.error("%s%s", " " * (ident * 2), message) + + +# ------------------------------------------------------------------------------ + + +def read_struct(data, fmt_str): + # type: (bytes, str) -> Tuple + """ + Reads input bytes and extract the given structure. Returns both the read + elements and the remaining data + + :param data: Data as bytes + :param fmt_str: Struct unpack format string + :return: A tuple (results as tuple, remaining data) + """ + size = struct.calcsize(fmt_str) + return struct.unpack(fmt_str, data[:size]), data[size:] + + +def read_string(data, length_fmt="H"): + # type: (bytes, str) -> Tuple[UNICODE_TYPE, bytes] + """ + Reads a serialized string + + :param data: Bytes where to read the string from + :param length_fmt: Structure format of the string length (H or Q) + :return: The deserialized string + """ + (length,), data = read_struct(data, ">{0}".format(length_fmt)) + ba, data = data[:length], data[length:] + return to_unicode(ba), data + + +# ------------------------------------------------------------------------------ + + +def java_data_fd(original_df): + # type: (IO[bytes]) -> IO[bytes] + """ + Ensures that the input file descriptor contains a Java serialized content. + Automatically uncompresses GZipped data + + :param original_df: Input file descriptor + :return: Input file descriptor or a fake one to access uncompressed data + :raise IOError: Error reading input file + """ + # Read the first bytes + start_idx = original_df.tell() + magic_header = [byte_to_int(x) for x in original_df.read(2)] # type: ignore + original_df.seek(start_idx, os.SEEK_SET) + + if magic_header[0] == 0xAC: + # Consider we have a raw seralized stream: use it + original_df.seek(start_idx, os.SEEK_SET) + return original_df + elif magic_header[0] == 0x1F and magic_header[1] == 0x8B: + # Open the GZip file + return gzip.GzipFile(fileobj=original_df, mode="rb") # type: ignore + else: + # Let the parser raise the error + return original_df + + +# ------------------------------------------------------------------------------ + + +def hexdump(src, start_offset=0, length=16): + # type: (str, int, int) -> str + """ + Prepares an hexadecimal dump string + + :param src: A string containing binary data + :param start_offset: The start offset of the source + :param length: Length of a dump line + :return: A dump string + """ + hex_filter = "".join( + (len(repr(chr(x))) == 3) and chr(x) or "." for x in range(256) + ) + pattern = "{{0:04X}} {{1:<{0}}} {{2}}\n".format(length * 3) + + # Convert raw data to str (Python 3 compatibility) + src = to_str(src, "latin-1") + + result = [] + for i in range(0, len(src), length): + s = src[i : i + length] + hexa = " ".join("{0:02X}".format(ord(x)) for x in s) + printable = s.translate(hex_filter) + result.append(pattern.format(i + start_offset, hexa, printable)) + + return "".join(result) + + +# ------------------------------------------------------------------------------ + + +if sys.version_info[0] >= 3: + BYTES_TYPE = bytes # pylint:disable=C0103 + UNICODE_TYPE = str # pylint:disable=C0103 + unicode_char = chr # pylint:disable=C0103 + + def bytes_char(c): + """ + Converts the given character to a bytes string + """ + return bytes((c,)) + + # Python 3 interpreter : bytes & str + def to_bytes(data, encoding="UTF-8"): + """ + Converts the given string to an array of bytes. + Returns the first parameter if it is already an array of bytes. + + :param data: A unicode string + :param encoding: The encoding of data + :return: The corresponding array of bytes + """ + if type(data) is bytes: # pylint:disable=C0123 + # Nothing to do + return data + return data.encode(encoding) + + def to_str(data, encoding="UTF-8"): + """ + Converts the given parameter to a string. + Returns the first parameter if it is already an instance of ``str``. + + :param data: A string + :param encoding: The encoding of data + :return: The corresponding string + """ + if type(data) is str: # pylint:disable=C0123 + # Nothing to do + return data + try: + return str(data, encoding) + except UnicodeDecodeError: + return decode_modified_utf8(data)[0] + + # Same operation + to_unicode = to_str # pylint:disable=C0103 + + def read_to_str(data): + """ + Concats all bytes into a string + """ + return "".join(chr(char) for char in data) + + +else: + BYTES_TYPE = str # pylint:disable=C0103 + UNICODE_TYPE = ( + unicode # pylint:disable=C0103,undefined-variable # noqa: F821 + ) + unicode_char = ( + unichr # pylint:disable=C0103,undefined-variable # noqa: F821 + ) + bytes_char = chr # pylint:disable=C0103 + + # Python 2 interpreter : str & unicode + def to_str(data, encoding="UTF-8"): + """ + Converts the given parameter to a string. + Returns the first parameter if it is already an instance of ``str``. + + :param data: A string + :param encoding: The encoding of data + :return: The corresponding string + """ + if type(data) is str: # pylint:disable=C0123 + # Nothing to do + return data + return data.encode(encoding) + + # Same operation + to_bytes = to_str # pylint:disable=C0103 + + # Python 2 interpreter : str & unicode + def to_unicode(data, encoding="UTF-8"): + """ + Converts the given parameter to a string. + Returns the first parameter if it is already an instance of ``str``. + + :param data: A string + :param encoding: The encoding of data + :return: The corresponding string + """ + if type(data) is UNICODE_TYPE: # pylint:disable=C0123 + # Nothing to do + return data + try: + return data.decode(encoding) + except UnicodeDecodeError: + return decode_modified_utf8(data)[0] + + def read_to_str(data): + """ + Nothing to do in Python 2 + """ + return data diff --git a/javaobj/v1/__init__.py b/javaobj/v1/__init__.py new file mode 100644 index 0000000..cc4aaaa --- /dev/null +++ b/javaobj/v1/__init__.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +""" +First version of the un-marshalling process of javaobj. + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from . import beans, core, transformers # noqa: F401 +from .core import ( # noqa: F401 + load, + loads, + dumps, + JavaObjectMarshaller, + JavaObjectUnmarshaller, +) +from .transformers import DefaultObjectTransformer # noqa: F401 + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" diff --git a/javaobj/v1/beans.py b/javaobj/v1/beans.py new file mode 100644 index 0000000..bf867bb --- /dev/null +++ b/javaobj/v1/beans.py @@ -0,0 +1,225 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Definition of the beans of the v1 parser + +:authors: Volodymyr Buell, Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +from typing import List +import struct + +from ..utils import UNICODE_TYPE + +# ------------------------------------------------------------------------------ + +__all__ = ( + "JavaArray", + "JavaByteArray", + "JavaClass", + "JavaEnum", + "JavaObject", + "JavaString", +) + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class JavaClass(object): # pylint:disable=R0205 + """ + Represents a class in the Java world + """ + + def __init__(self): + """ + Sets up members + """ + self.name = None # type: str + self.serialVersionUID = None # type: int # pylint:disable=C0103 + self.flags = None # type: int + self.fields_names = [] # type: List[str] + self.fields_types = [] # type: List[JavaString] + self.superclass = None # type: JavaClass + + def __str__(self): + """ + String representation of the Java class + """ + return self.__repr__() + + def __repr__(self): + """ + String representation of the Java class + """ + return "[{0:s}:0x{1:X}]".format(self.name, self.serialVersionUID) + + def __eq__(self, other): + """ + Equality test between two Java classes + + :param other: Other JavaClass to test + :return: True if both classes share the same fields and name + """ + if not isinstance(other, type(self)): + return False + + return ( + self.name == other.name + and self.serialVersionUID == other.serialVersionUID + and self.flags == other.flags + and self.fields_names == other.fields_names + and self.fields_types == other.fields_types + and self.superclass == other.superclass + ) + + +class JavaObject(object): # pylint:disable=R0205 + """ + Represents a deserialized non-primitive Java object + """ + + def __init__(self): + """ + Sets up members + """ + self.classdesc = None # type: JavaClass + self.annotations = [] + + def get_class(self): + """ + Returns the JavaClass that defines the type of this object + """ + return self.classdesc + + def __str__(self): + """ + String representation + """ + return self.__repr__() + + def __repr__(self): + """ + String representation + """ + name = "UNKNOWN" + if self.classdesc: + name = self.classdesc.name + return "".format(name) + + def __hash__(self): + """ + Each JavaObject we load must have a hash method to be accepted in sets + and alike. The default hash is the memory address of the object. + """ + return id(self) + + def __eq__(self, other): + """ + Equality test between two Java classes + + :param other: Other JavaClass to test + :return: True if both classes share the same fields and name + """ + if not isinstance(other, type(self)): + return False + + res = ( + self.classdesc == other.classdesc + and self.annotations == other.annotations + ) + if not res: + return False + + for name in self.classdesc.fields_names: + if not getattr(self, name) == getattr(other, name): + return False + return True + + +class JavaString(UNICODE_TYPE): + """ + Represents a Java String + """ + + def __hash__(self): + return UNICODE_TYPE.__hash__(self) + + def __eq__(self, other): + if not isinstance(other, UNICODE_TYPE): + return False + return UNICODE_TYPE.__eq__(self, other) + + +class JavaEnum(JavaObject): + """ + Represents a Java enumeration + """ + + def __init__(self, constant=None): + super(JavaEnum, self).__init__() + self.constant = constant + + +class JavaArray(list, JavaObject): + """ + Represents a Java Array + """ + + def __init__(self, classdesc=None): + list.__init__(self) + JavaObject.__init__(self) + self.classdesc = classdesc + + def __hash__(self): + return list.__hash__(self) + + +class JavaByteArray(JavaObject): + """ + Represents the special case of Java Array which contains bytes + """ + + def __init__(self, data, classdesc=None): + JavaObject.__init__(self) + self._data = struct.unpack("b" * len(data), data) + self.classdesc = classdesc + + def __str__(self): + return "JavaByteArray({0})".format(self._data) + + def __getitem__(self, item): + return self._data[item] + + def __iter__(self): + return iter(self._data) + + def __len__(self): + return len(self._data) diff --git a/javaobj/v1/core.py b/javaobj/v1/core.py new file mode 100644 index 0000000..ae5eeb5 --- /dev/null +++ b/javaobj/v1/core.py @@ -0,0 +1,133 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Provides functions for reading and writing (writing is WIP currently) Java +objects serialized or will be deserialized by ObjectOutputStream. This form of +object representation is a standard data interchange format in Java world. + +javaobj module exposes an API familiar to users of the standard library +marshal, pickle and json modules. + +See: +http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html + +:authors: Volodymyr Buell, Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +# Standard library +try: + # Python 2 + from StringIO import StringIO as BytesIO +except ImportError: + # Python 3+ + from io import BytesIO + +# Javaobj modules +from .marshaller import JavaObjectMarshaller +from .unmarshaller import JavaObjectUnmarshaller +from .transformers import DefaultObjectTransformer +from ..utils import java_data_fd + +# ------------------------------------------------------------------------------ + +__all__ = ( + "__version_info__", + "__version__", + "JavaObjectMarshaller", + "JavaObjectUnmarshaller", + "dumps", + "load", + "loads", +) + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +def load(file_object, *transformers, **kwargs): + """ + Deserializes Java primitive data and objects serialized using + ObjectOutputStream from a file-like object. + + :param file_object: A file-like object + :param transformers: Custom transformers to use + :param ignore_remaining_data: If True, don't log an error when unused + trailing bytes are remaining + :return: The deserialized object + """ + # Check file format (uncompress if necessary) + file_object = java_data_fd(file_object) + + # Read keyword argument + ignore_remaining_data = kwargs.get("ignore_remaining_data", False) + + marshaller = JavaObjectUnmarshaller( + file_object, kwargs.get("use_numpy_arrays", False) + ) + + # Add custom transformers first + for transformer in transformers: + marshaller.add_transformer(transformer) + marshaller.add_transformer(DefaultObjectTransformer()) + + # Read the file object + return marshaller.readObject(ignore_remaining_data=ignore_remaining_data) + + +def loads(string, *transformers, **kwargs): + """ + Deserializes Java objects and primitive data serialized using + ObjectOutputStream from a string. + + :param string: A Java data string + :param transformers: Custom transformers to use + :param ignore_remaining_data: If True, don't log an error when unused + trailing bytes are remaining + :return: The deserialized object + """ + # Reuse the load method (avoid code duplication) + return load(BytesIO(string), *transformers, **kwargs) + + +def dumps(obj, *transformers): + """ + Serializes Java primitive data and objects unmarshaled by load(s) before + into string. + + :param obj: A Python primitive object, or one loaded using load(s) + :param transformers: Custom transformers to use + :return: The serialized data as a string + """ + marshaller = JavaObjectMarshaller() + # Add custom transformers + for transformer in transformers: + marshaller.add_transformer(transformer) + + return marshaller.dump(obj) diff --git a/javaobj/v1/marshaller.py b/javaobj/v1/marshaller.py new file mode 100644 index 0000000..9e5bdeb --- /dev/null +++ b/javaobj/v1/marshaller.py @@ -0,0 +1,574 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Provides functions for writing (writing is WIP currently) Java +objects that will be deserialized by ObjectOutputStream. This form of +object representation is a standard data interchange format in Java world. + +javaobj module exposes an API familiar to users of the standard library +marshal, pickle and json modules. + +See: +http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html + +:authors: Volodymyr Buell, Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +# Standard library +import collections +import logging +import struct + +try: + # Python 2 + from StringIO import StringIO as BytesIO +except ImportError: + # Python 3+ + from io import BytesIO + +# Javaobj modules +from .beans import ( + JavaClass, + JavaString, + JavaObject, + JavaByteArray, + JavaEnum, + JavaArray, +) +from ..constants import ( + StreamConstants, + ClassDescFlags, + TerminalCode, + TypeCode, +) +from ..utils import ( + log_debug, + log_error, + to_bytes, + BYTES_TYPE, + UNICODE_TYPE, +) + +# ------------------------------------------------------------------------------ + +__all__ = ("JavaObjectMarshaller",) + + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class JavaObjectMarshaller: + """ + Serializes objects into Java serialization format + """ + + def __init__(self, stream=None): + """ + Sets up members + + :param stream: An output stream + """ + self.object_stream = stream + self.object_obj = None + self.object_transformers = [] + self.references = [] + + def add_transformer(self, transformer): + """ + Appends an object transformer to the serialization process + + :param transformer: An object with a transform(obj) method + """ + self.object_transformers.append(transformer) + + def dump(self, obj): + """ + Dumps the given object in the Java serialization format + """ + self.references = [] + self.object_obj = obj + self.object_stream = BytesIO() + self._writeStreamHeader() + self.writeObject(obj) + return self.object_stream.getvalue() + + def _writeStreamHeader(self): # pylint:disable=C0103 + """ + Writes the Java serialization magic header in the serialization stream + """ + self._writeStruct( + ">HH", + 4, + (StreamConstants.STREAM_MAGIC, StreamConstants.STREAM_VERSION), + ) + + def writeObject(self, obj): # pylint:disable=C0103 + """ + Appends an object to the serialization stream + + :param obj: A string or a deserialized Java object + :raise RuntimeError: Unsupported type + """ + log_debug("Writing object of type {0}".format(type(obj).__name__)) + if isinstance(obj, JavaArray): + # Deserialized Java array + self.write_array(obj) + elif isinstance(obj, JavaByteArray): + # Deserialized Java byte array + self.write_array(obj) + elif isinstance(obj, JavaEnum): + # Deserialized Java Enum + self.write_enum(obj) + elif isinstance(obj, JavaObject): + # Deserialized Java object + self.write_object(obj) + elif isinstance(obj, JavaString): + # Deserialized String + self.write_string(obj) + elif isinstance(obj, JavaClass): + # Java class + self.write_class(obj) + elif obj is None: + # Null + self.write_null() + elif type(obj) is str: # pylint:disable=C0123 + # String value + self.write_blockdata(obj) + else: + # Unhandled type + raise RuntimeError( + "Object serialization of type {0} is not " + "supported.".format(type(obj)) + ) + + def _writeStruct(self, unpack, length, args): # pylint:disable=C0103 + """ + Appends data to the serialization stream + + :param unpack: Struct format string + :param length: Unused + :param args: Struct arguments + """ + ba = struct.pack(unpack, *args) + self.object_stream.write(ba) + + def _writeString(self, obj, use_reference=True): # pylint:disable=C0103 + """ + Appends a string to the serialization stream + + :param obj: String to serialize + :param use_reference: If True, allow writing a reference + """ + # TODO: Convert to "modified UTF-8" + # http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8 + string = to_bytes(obj, "utf-8") + + if use_reference and isinstance(obj, JavaString): + try: + idx = self.references.index(obj) + except ValueError: + # First appearance of the string + self.references.append(obj) + logging.debug( + "*** Adding ref 0x%X for string: %s", + len(self.references) + - 1 + + StreamConstants.BASE_REFERENCE_IDX, + obj, + ) + + self._writeStruct(">H", 2, (len(string),)) + self.object_stream.write(string) + else: + # Write a reference to the previous type + logging.debug( + "*** Reusing ref 0x%X for string: %s", + idx + StreamConstants.BASE_REFERENCE_IDX, + obj, + ) + self.write_reference(idx) + else: + self._writeStruct(">H", 2, (len(string),)) + self.object_stream.write(string) + + def write_string(self, obj, use_reference=True): + """ + Writes a Java string with the TC_STRING type marker + + :param obj: The string to print + :param use_reference: If True, allow writing a reference + """ + if use_reference and isinstance(obj, JavaString): + try: + idx = self.references.index(obj) + except ValueError: + # String is not referenced: let _writeString store it + self._writeStruct(">B", 1, (TerminalCode.TC_STRING,)) + self._writeString(obj, use_reference) + else: + # Reuse the referenced string + logging.debug( + "*** Reusing ref 0x%X for String: %s", + idx + StreamConstants.BASE_REFERENCE_IDX, + obj, + ) + self.write_reference(idx) + else: + # Don't use references + self._writeStruct(">B", 1, (TerminalCode.TC_STRING,)) + self._writeString(obj, use_reference) + + def write_enum(self, obj): + """ + Writes an Enum value + + :param obj: A JavaEnum object + """ + # FIXME: the output doesn't have the same references as the real + # serializable form + self._writeStruct(">B", 1, (TerminalCode.TC_ENUM,)) + + try: + idx = self.references.index(obj) + except ValueError: + # New reference + self.references.append(obj) + logging.debug( + "*** Adding ref 0x%X for enum: %s", + len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX, + obj, + ) + + self.write_classdesc(obj.get_class()) + else: + self.write_reference(idx) + + self.write_string(obj.constant) + + def write_blockdata(self, obj, parent=None): # pylint:disable=W0613 + """ + Appends a block of data to the serialization stream + + :param obj: String form of the data block + """ + if isinstance(obj, UNICODE_TYPE): + # Latin-1: keep bytes as is + obj = to_bytes(obj, "latin-1") + + length = len(obj) + if length <= 256: + # Small block data + # TC_BLOCKDATA (unsigned byte) (byte)[size] + self._writeStruct(">B", 1, (TerminalCode.TC_BLOCKDATA,)) + self._writeStruct(">B", 1, (length,)) + else: + # Large block data + # TC_BLOCKDATALONG (unsigned int) (byte)[size] + self._writeStruct(">B", 1, (TerminalCode.TC_BLOCKDATALONG,)) + self._writeStruct(">I", 1, (length,)) + + self.object_stream.write(obj) + + def write_null(self): + """ + Writes a "null" value + """ + self._writeStruct(">B", 1, (TerminalCode.TC_NULL,)) + + def write_object(self, obj, parent=None): + """ + Writes an object header to the serialization stream + + :param obj: Not yet used + :param parent: Not yet used + """ + # Transform object + for transformer in self.object_transformers: + tmp_object = transformer.transform(obj) + if tmp_object is not obj: + obj = tmp_object + break + + self._writeStruct(">B", 1, (TerminalCode.TC_OBJECT,)) + cls = obj.get_class() + self.write_classdesc(cls) + + # Add reference + self.references.append([]) + logging.debug( + "*** Adding ref 0x%X for object %s", + len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX, + obj, + ) + + all_names = collections.deque() + all_types = collections.deque() + tmpcls = cls + while tmpcls: + all_names.extendleft(reversed(tmpcls.fields_names)) + all_types.extendleft(reversed(tmpcls.fields_types)) + tmpcls = tmpcls.superclass + del tmpcls + + logging.debug("<=> Field names: %s", all_names) + logging.debug("<=> Field types: %s", all_types) + + for field_name, field_type in zip(all_names, all_types): + try: + logging.debug( + "Writing field %s (%s): %s", + field_name, + field_type, + getattr(obj, field_name), + ) + self._write_value(field_type, getattr(obj, field_name)) + except AttributeError as ex: + log_error( + "No attribute {0} for object {1}\nDir: {2}".format( + ex, repr(obj), dir(obj) + ) + ) + raise + del all_names, all_types + + if ( + cls.flags & ClassDescFlags.SC_SERIALIZABLE + and cls.flags & ClassDescFlags.SC_WRITE_METHOD + or cls.flags & ClassDescFlags.SC_EXTERNALIZABLE + and cls.flags & ClassDescFlags.SC_BLOCK_DATA + ): + for annotation in obj.annotations: + log_debug( + "Write annotation {0} for {1}".format( + repr(annotation), repr(obj) + ) + ) + if annotation is None: + self.write_null() + else: + self.writeObject(annotation) + self._writeStruct(">B", 1, (TerminalCode.TC_ENDBLOCKDATA,)) + + def write_class(self, obj, parent=None): # pylint:disable=W0613 + """ + Writes a class to the stream + + :param obj: A JavaClass object + :param parent: + """ + self._writeStruct(">B", 1, (TerminalCode.TC_CLASS,)) + self.write_classdesc(obj) + + def write_classdesc(self, obj, parent=None): # pylint:disable=W0613 + """ + Writes a class description + + :param obj: Class description to write + :param parent: + """ + if obj not in self.references: + # Add reference + self.references.append(obj) + logging.debug( + "*** Adding ref 0x%X for classdesc %s", + len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX, + obj.name, + ) + + self._writeStruct(">B", 1, (TerminalCode.TC_CLASSDESC,)) + self._writeString(obj.name) + self._writeStruct(">qB", 1, (obj.serialVersionUID, obj.flags)) + self._writeStruct(">H", 1, (len(obj.fields_names),)) + + for field_name, field_type in zip( + obj.fields_names, obj.fields_types + ): + self._writeStruct( + ">B", 1, (self._convert_type_to_char(field_type),) + ) + self._writeString(field_name) + if ord(field_type[0]) in ( + TypeCode.TYPE_OBJECT, + TypeCode.TYPE_ARRAY, + ): + try: + idx = self.references.index(field_type) + except ValueError: + # First appearance of the type + self.references.append(field_type) + logging.debug( + "*** Adding ref 0x%X for field type %s", + len(self.references) + - 1 + + StreamConstants.BASE_REFERENCE_IDX, + field_type, + ) + + self.write_string(field_type, False) + else: + # Write a reference to the previous type + logging.debug( + "*** Reusing ref 0x%X for %s (%s)", + idx + StreamConstants.BASE_REFERENCE_IDX, + field_type, + field_name, + ) + self.write_reference(idx) + + self._writeStruct(">B", 1, (TerminalCode.TC_ENDBLOCKDATA,)) + if obj.superclass: + self.write_classdesc(obj.superclass) + else: + self.write_null() + else: + # Use reference + self.write_reference(self.references.index(obj)) + + def write_reference(self, ref_index): + """ + Writes a reference + :param ref_index: Local index (0-based) to the reference + """ + self._writeStruct( + ">BL", + 1, + ( + TerminalCode.TC_REFERENCE, + ref_index + StreamConstants.BASE_REFERENCE_IDX, + ), + ) + + def write_array(self, obj): + """ + Writes a JavaArray + + :param obj: A JavaArray object + """ + classdesc = obj.get_class() + self._writeStruct(">B", 1, (TerminalCode.TC_ARRAY,)) + self.write_classdesc(classdesc) + self._writeStruct(">i", 1, (len(obj),)) + + # Add reference + self.references.append(obj) + logging.debug( + "*** Adding ref 0x%X for array []", + len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX, + ) + + array_type_code = TypeCode(ord(classdesc.name[0])) + assert array_type_code == TypeCode.TYPE_ARRAY + type_code = TypeCode(ord(classdesc.name[1])) + + if type_code == TypeCode.TYPE_OBJECT: + for o in obj: + self._write_value(classdesc.name[1:], o) + elif type_code == TypeCode.TYPE_ARRAY: + for a in obj: + self.write_array(a) + else: + log_debug("Write array of type {0}".format(chr(type_code.value))) + for v in obj: + log_debug("Writing: %s" % v) + self._write_value(type_code, v) + + def _write_value(self, raw_field_type, value): + """ + Writes an item of an array + + :param raw_field_type: Value type + :param value: The value itself + """ + if isinstance(raw_field_type, (TypeCode, int)): + field_type = raw_field_type + else: + # We don't need details for arrays and objects + field_type = TypeCode(ord(raw_field_type[0])) + + if field_type == TypeCode.TYPE_BOOLEAN: + self._writeStruct(">B", 1, (1 if value else 0,)) + elif field_type == TypeCode.TYPE_BYTE: + self._writeStruct(">b", 1, (value,)) + elif field_type == TypeCode.TYPE_CHAR: + self._writeStruct(">H", 1, (ord(value),)) + elif field_type == TypeCode.TYPE_SHORT: + self._writeStruct(">h", 1, (value,)) + elif field_type == TypeCode.TYPE_INTEGER: + self._writeStruct(">i", 1, (value,)) + elif field_type == TypeCode.TYPE_LONG: + self._writeStruct(">q", 1, (value,)) + elif field_type == TypeCode.TYPE_FLOAT: + self._writeStruct(">f", 1, (value,)) + elif field_type == TypeCode.TYPE_DOUBLE: + self._writeStruct(">d", 1, (value,)) + elif field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY): + if value is None: + self.write_null() + elif isinstance(value, JavaEnum): + self.write_enum(value) + elif isinstance(value, (JavaArray, JavaByteArray)): + self.write_array(value) + elif isinstance(value, JavaObject): + self.write_object(value) + elif isinstance(value, JavaString): + self.write_string(value) + elif isinstance(value, JavaClass): + self.write_class(value) + elif isinstance(value, (BYTES_TYPE, UNICODE_TYPE)): + self.write_blockdata(value) + else: + raise RuntimeError("Unknown typecode: {0}".format(field_type)) + else: + raise RuntimeError("Unknown typecode: {0}".format(field_type)) + + @staticmethod + def _convert_type_to_char(type_char): + """ + Converts the given type code to an int + + :param type_char: A type code character + """ + if isinstance(type_char, TypeCode): + return type_char.value + + if isinstance(type_char, int): + return type_char + + if isinstance(type_char, (BYTES_TYPE, UNICODE_TYPE)): + # Conversion to TypeCode will raise an error if the type + # is invalid + return TypeCode(ord(type_char[0])).value + + raise RuntimeError( + "Typecode {0} ({1}) isn't supported.".format( + type_char, ord(type_char[0]) + ) + ) diff --git a/javaobj/v1/transformers.py b/javaobj/v1/transformers.py new file mode 100644 index 0000000..c581125 --- /dev/null +++ b/javaobj/v1/transformers.py @@ -0,0 +1,392 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Implementation of the object transformers in v1 parser + +:authors: Volodymyr Buell, Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +from typing import Callable, Dict +import functools + +from .beans import JavaClass, JavaObject +from .unmarshaller import JavaObjectUnmarshaller +from ..constants import ClassDescFlags, TerminalCode, TypeCode +from ..utils import ( + log_debug, + log_error, + to_bytes, + read_struct, + read_string, +) + + +__all__ = ("DefaultObjectTransformer",) + + +class DefaultObjectTransformer(object): # pylint:disable=R0205 + """ + Default transformer for the deserialized objects. + Converts JavaObject objects to Python types (maps, lists, ...) + """ + + class JavaList(list, JavaObject): + """ + Python-Java list bridge type + """ + + def __init__(self, unmarshaller): + # type: (JavaObjectUnmarshaller) -> None + list.__init__(self) + JavaObject.__init__(self) + + def __hash__(self): + return list.__hash__(self) + + def __extra_loading__(self, unmarshaller, ident=0): + # type: (JavaObjectUnmarshaller, int) -> None + """ + Loads the content of the map, written with a custom implementation + """ + # Lists have their content in there annotations + self.extend(self.annotations[1:]) + + @functools.total_ordering + class JavaPrimitiveClass(JavaObject): + """ + Parent of Java classes matching a primitive (Bool, Integer, Long, ...) + """ + + def __init__(self, unmarshaller): + JavaObject.__init__(self) + self.value = None + + def __str__(self): + return str(self.value) + + def __repr__(self): + return repr(self.value) + + def __hash__(self): + return hash(self.value) + + def __eq__(self, other): + return self.value == other + + def __lt__(self, other): + return self.value < other + + class JavaBool(JavaPrimitiveClass): + def __bool__(self): + return self.value + + class JavaInt(JavaPrimitiveClass): + def __int__(self): + return self.value + + class JavaMap(dict, JavaObject): + """ + Python-Java dictionary/map bridge type + """ + + def __init__(self, unmarshaller): + # type: (JavaObjectUnmarshaller) -> None + dict.__init__(self) + JavaObject.__init__(self) + + def __hash__(self): + return dict.__hash__(self) + + def __extra_loading__(self, unmarshaller, ident=0): + # type: (JavaObjectUnmarshaller, int) -> None + """ + Loads the content of the map, written with a custom implementation + """ + # Group annotation elements 2 by 2 + args = [iter(self.annotations[1:])] * 2 + for key, value in zip(*args): + self[key] = value + + class JavaLinkedHashMap(JavaMap): + def __extra_loading__(self, unmarshaller, ident=0): + # type: (JavaObjectUnmarshaller, int) -> None + """ + Loads the content of the map, written with a custom implementation + """ + # Ignore the blockdata opid + (opid,) = unmarshaller._readStruct(">B") + if opid != ClassDescFlags.SC_BLOCK_DATA: + raise ValueError("Start of block data not found") + + # Read HashMap fields + self.buckets = unmarshaller._read_value( + TypeCode.TYPE_INTEGER, ident + ) + self.size = unmarshaller._read_value(TypeCode.TYPE_INTEGER, ident) + + # Read entries + for _ in range(self.size): + key = unmarshaller._read_and_exec_opcode()[1] + value = unmarshaller._read_and_exec_opcode()[1] + self[key] = value + + # Ignore the end of the blockdata + unmarshaller._read_and_exec_opcode( + ident, [TerminalCode.TC_ENDBLOCKDATA] + ) + + # Ignore the trailing 0 + (opid,) = unmarshaller._readStruct(">B") + if opid != 0: + raise ValueError("Should find 0x0, got {0:x}".format(opid)) + + class JavaSet(set, JavaObject): + """ + Python-Java set bridge type + """ + + def __init__(self, unmarshaller): + # type: (JavaObjectUnmarshaller) -> None + set.__init__(self) + JavaObject.__init__(self) + + def __hash__(self): + return set.__hash__(self) + + def __extra_loading__(self, unmarshaller, ident=0): + # type: (JavaObjectUnmarshaller, int) -> None + """ + Loads the content of the map, written with a custom implementation + """ + self.update(self.annotations[1:]) + + class JavaTreeSet(JavaSet): + def __extra_loading__(self, unmarshaller, ident=0): + # type: (JavaObjectUnmarshaller, int) -> None + """ + Loads the content of the map, written with a custom implementation + """ + # Annotation[1] == size of the set + self.update(self.annotations[2:]) + + class JavaTime(JavaObject): + """ + Represents the classes found in the java.time package + + The semantic of the fields depends on the type of time that has been + parsed + """ + + DURATION_TYPE = 1 + INSTANT_TYPE = 2 + LOCAL_DATE_TYPE = 3 + LOCAL_TIME_TYPE = 4 + LOCAL_DATE_TIME_TYPE = 5 + ZONE_DATE_TIME_TYPE = 6 + ZONE_REGION_TYPE = 7 + ZONE_OFFSET_TYPE = 8 + OFFSET_TIME_TYPE = 9 + OFFSET_DATE_TIME_TYPE = 10 + YEAR_TYPE = 11 + YEAR_MONTH_TYPE = 12 + MONTH_DAY_TYPE = 13 + PERIOD_TYPE = 14 + + def __init__(self, unmarshaller): + # type: (JavaObjectUnmarshaller) -> None + JavaObject.__init__(self) + self.type = -1 + self.year = None + self.month = None + self.day = None + self.hour = None + self.minute = None + self.second = None + self.nano = None + self.offset = None + self.zone = None + + self.time_handlers = { + self.DURATION_TYPE: self.do_duration, + self.INSTANT_TYPE: self.do_instant, + self.LOCAL_DATE_TYPE: self.do_local_date, + self.LOCAL_DATE_TIME_TYPE: self.do_local_date_time, + self.LOCAL_TIME_TYPE: self.do_local_time, + self.ZONE_DATE_TIME_TYPE: self.do_zoned_date_time, + self.ZONE_OFFSET_TYPE: self.do_zone_offset, + self.ZONE_REGION_TYPE: self.do_zone_region, + self.OFFSET_TIME_TYPE: self.do_offset_time, + self.OFFSET_DATE_TIME_TYPE: self.do_offset_date_time, + self.YEAR_TYPE: self.do_year, + self.YEAR_MONTH_TYPE: self.do_year_month, + self.MONTH_DAY_TYPE: self.do_month_day, + self.PERIOD_TYPE: self.do_period, + } + + def __str__(self): + return ( + "JavaTime(type=0x{s.type}, " + "year={s.year}, month={s.month}, day={s.day}, " + "hour={s.hour}, minute={s.minute}, second={s.second}, " + "nano={s.nano}, offset={s.offset}, zone={s.zone})" + ).format(s=self) + + def __extra_loading__(self, unmarshaller, ident=0): + # type: (JavaObjectUnmarshaller, int) -> None + """ + Loads the content of the map, written with a custom implementation + """ + # Convert back annotations to bytes + # latin-1 is used to ensure that bytes are kept as is + content = to_bytes(self.annotations[0], "latin1") + (self.type,), content = read_struct(content, ">b") + + try: + self.time_handlers[self.type](unmarshaller, content) + except KeyError as ex: + log_error("Unhandled kind of time: {}".format(ex)) + + def do_duration(self, unmarshaller, data): + (self.second, self.nano), data = read_struct(data, ">qi") + return data + + def do_instant(self, unmarshaller, data): + (self.second, self.nano), data = read_struct(data, ">qi") + return data + + def do_local_date(self, unmarshaller, data): + (self.year, self.month, self.day), data = read_struct(data, ">ibb") + return data + + def do_local_time(self, unmarshaller, data): + (hour,), data = read_struct(data, ">b") + minute = 0 + second = 0 + nano = 0 + + if hour < 0: + hour = ~hour + else: + (minute,), data = read_struct(data, ">b") + if minute < 0: + minute = ~minute + else: + (second,), data = read_struct(data, ">b") + if second < 0: + second = ~second + else: + (nano,), data = read_struct(data, ">i") + + self.hour = hour + self.minute = minute + self.second = second + self.nano = nano + return data + + def do_local_date_time(self, unmarshaller, data): + data = self.do_local_date(unmarshaller, data) + data = self.do_local_time(unmarshaller, data) + return data + + def do_zoned_date_time(self, unmarshaller, data): + data = self.do_local_date_time(unmarshaller, data) + data = self.do_zone_offset(unmarshaller, data) + data = self.do_zone_region(unmarshaller, data) + return data + + def do_zone_offset(self, unmarshaller, data): + (offset_byte,), data = read_struct(data, ">b") + if offset_byte == 127: + (self.offset,), data = read_struct(data, ">i") + else: + self.offset = offset_byte * 900 + return data + + def do_zone_region(self, unmarshaller, data): + self.zone, data = read_string(data) + return data + + def do_offset_time(self, unmarshaller, data): + data = self.do_local_time(unmarshaller, data) + data = self.do_zone_offset(unmarshaller, data) + return data + + def do_offset_date_time(self, unmarshaller, data): + data = self.do_local_date_time(unmarshaller, data) + data = self.do_zone_offset(unmarshaller, data) + return data + + def do_year(self, unmarshaller, data): + (self.year,), data = read_struct(data, ">i") + return data + + def do_year_month(self, unmarshaller, data): + (self.year, self.month), data = read_struct(data, ">ib") + return data + + def do_month_day(self, unmarshaller, data): + (self.month, self.day), data = read_struct(data, ">bb") + return data + + def do_period(self, unmarshaller, data): + (self.year, self.month, self.day), data = read_struct(data, ">iii") + return data + + TYPE_MAPPER = { + "java.util.ArrayList": JavaList, + "java.util.LinkedList": JavaList, + "java.util.HashMap": JavaMap, + "java.util.LinkedHashMap": JavaLinkedHashMap, + "java.util.TreeMap": JavaMap, + "java.util.HashSet": JavaSet, + "java.util.LinkedHashSet": JavaSet, + "java.util.TreeSet": JavaTreeSet, + "java.time.Ser": JavaTime, + "java.lang.Boolean": JavaBool, + "java.lang.Integer": JavaInt, + "java.lang.Long": JavaInt, + } # type: Dict[str, Callable[[JavaObjectUnmarshaller], JavaObject]] + + def create(self, classdesc, unmarshaller): + # type: (JavaClass, JavaObjectUnmarshaller) -> JavaObject + """ + Transforms a deserialized Java object into a Python object + + :param classdesc: The description of a Java class + :return: The Python form of the object, or the original JavaObject + """ + try: + mapped_type = self.TYPE_MAPPER[classdesc.name] + except KeyError: + # Return a JavaObject by default + return JavaObject() + else: + log_debug("---") + log_debug(classdesc.name) + log_debug("---") + + java_object = mapped_type(unmarshaller) + + log_debug(">>> java_object: {0}".format(java_object)) + return java_object diff --git a/javaobj/v1/unmarshaller.py b/javaobj/v1/unmarshaller.py new file mode 100644 index 0000000..c3c7709 --- /dev/null +++ b/javaobj/v1/unmarshaller.py @@ -0,0 +1,853 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Provides functions for reading Java objects serialized by ObjectOutputStream. +This form of object representation is a standard data interchange format in +Java world. + +javaobj module exposes an API familiar to users of the standard library +marshal, pickle and json modules. + +See: +http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html + +:authors: Volodymyr Buell, Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +# Standard library +from typing import Any, Union +import os +import struct + +# Javaobj modules +from .beans import ( + JavaClass, + JavaString, + JavaObject, + JavaByteArray, + JavaEnum, + JavaArray, +) +from ..constants import ( + StreamConstants, + ClassDescFlags, + TerminalCode, + TypeCode, + StreamCodeDebug, +) +from ..utils import ( + log_debug, + log_error, + read_to_str, + to_unicode, + unicode_char, + hexdump, +) + +numpy = None # Imported only when really used + +# ------------------------------------------------------------------------------ + +__all__ = ("JavaObjectUnmarshaller",) + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + +# Convertion of a Java type char to its NumPy equivalent +NUMPY_TYPE_MAP = { + TypeCode.TYPE_BYTE: "B", + TypeCode.TYPE_CHAR: "b", + TypeCode.TYPE_DOUBLE: ">d", + TypeCode.TYPE_FLOAT: ">f", + TypeCode.TYPE_INTEGER: ">i", + TypeCode.TYPE_LONG: ">l", + TypeCode.TYPE_SHORT: ">h", + TypeCode.TYPE_BOOLEAN: ">B", +} + +# ------------------------------------------------------------------------------ + + +class JavaObjectUnmarshaller: + """ + Deserializes a Java serialization stream + """ + + def __init__(self, stream, use_numpy_arrays=False): + """ + Sets up members + + :param stream: An input stream (opened in binary/bytes mode) + :raise IOError: Invalid input stream + """ + self.use_numpy_arrays = use_numpy_arrays + + # Numpy array support + if self.use_numpy_arrays: + try: + global numpy + import numpy as np + + numpy = np + except ImportError: + pass + + # Check stream + if stream is None: + raise IOError("No input stream given") + + # Prepare the association Terminal Symbol -> Reading method + self.opmap = { + TerminalCode.TC_NULL: self.do_null, + TerminalCode.TC_CLASSDESC: self.do_classdesc, + TerminalCode.TC_OBJECT: self.do_object, + TerminalCode.TC_STRING: self.do_string, + TerminalCode.TC_LONGSTRING: self.do_string_long, + TerminalCode.TC_ARRAY: self.do_array, + TerminalCode.TC_CLASS: self.do_class, + TerminalCode.TC_BLOCKDATA: self.do_blockdata, + TerminalCode.TC_BLOCKDATALONG: self.do_blockdata_long, + TerminalCode.TC_REFERENCE: self.do_reference, + TerminalCode.TC_ENUM: self.do_enum, + # note that we are reusing do_null: + TerminalCode.TC_ENDBLOCKDATA: self.do_null, + } + + # Set up members + self.current_object = None + self.reference_counter = 0 + self.references = [] + self.object_transformers = [] + self.object_stream = stream + + # Read the stream header (magic & version) + self._readStreamHeader() + + def readObject(self, ignore_remaining_data=False): + """ + Reads an object from the input stream + + :param ignore_remaining_data: If True, don't log an error when + unused trailing bytes are remaining + :return: The unmarshalled object + :raise Exception: Any exception that occurred during unmarshalling + """ + try: + # TODO: add expects + _, res = self._read_and_exec_opcode(ident=0) + + position_bak = self.object_stream.tell() + the_rest = self.object_stream.read() + if not ignore_remaining_data and len(the_rest) != 0: + log_error( + "Warning!!!!: Stream still has {0} bytes left. " + "Enable debug mode of logging to see the hexdump.".format( + len(the_rest) + ) + ) + log_debug("\n{0}".format(hexdump(the_rest))) + else: + log_debug("Java Object unmarshalled successfully!") + + self.object_stream.seek(position_bak) + return res + except Exception: + self._oops_dump_state(ignore_remaining_data) + raise + + def add_transformer(self, transformer): + """ + Appends an object transformer to the deserialization process + + :param transformer: An object with a transform(obj) method + """ + self.object_transformers.append(transformer) + + def _readStreamHeader(self): + """ + Reads the magic header of a Java serialization stream + + :raise IOError: Invalid magic header (not a Java stream) + """ + (magic, version) = self._readStruct(">HH") + if ( + magic != StreamConstants.STREAM_MAGIC + or version != StreamConstants.STREAM_VERSION + ): + raise IOError( + "The stream is not java serialized object. " + "Invalid stream header: {0:04X}{1:04X}".format(magic, version) + ) + + def _read_and_exec_opcode(self, ident=0, expect=None): + """ + Reads the next opcode, and executes its handler + + :param ident: Log identation level + :param expect: A list of expected opcodes + :return: A tuple: (opcode, result of the handler) + :raise IOError: Read opcode is not one of the expected ones + :raise RuntimeError: Unknown opcode + """ + position = self.object_stream.tell() + (opid,) = self._readStruct(">B") + log_debug( + "OpCode: 0x{0:X} -- {1} (at offset 0x{2:X})".format( + opid, StreamCodeDebug.op_id(opid), position + ), + ident, + ) + + if expect and opid not in expect: + raise IOError( + "Unexpected opcode 0x{0:X} -- {1} " + "(at offset 0x{2:X})".format( + opid, StreamCodeDebug.op_id(opid), position + ) + ) + + try: + handler = self.opmap[opid] + except KeyError: + raise RuntimeError( + "Unknown OpCode in the stream: 0x{0:X} " + "(at offset 0x{1:X})".format(opid, position) + ) + else: + return opid, handler(ident=ident) + + def _readStruct(self, unpack): + """ + Reads from the input stream, using struct + + :param unpack: An unpack format string + :return: The result of struct.unpack (tuple) + :raise RuntimeError: End of stream reached during unpacking + """ + length = struct.calcsize(unpack) + ba = self.object_stream.read(length) + + if len(ba) != length: + raise RuntimeError( + "Stream has been ended unexpectedly while unmarshaling." + ) + + return struct.unpack(unpack, ba) + + def _readString(self, length_fmt="H"): + """ + Reads a serialized string + + :param length_fmt: Structure format of the string length (H or Q) + :return: The deserialized string + :raise RuntimeError: Unexpected end of stream + """ + (length,) = self._readStruct(">{0}".format(length_fmt)) + ba = self.object_stream.read(length) + return to_unicode(ba) + + def do_classdesc(self, parent=None, ident=0): + """ + Handles a TC_CLASSDESC opcode + + :param parent: + :param ident: Log indentation level + :return: A JavaClass object + """ + # TC_CLASSDESC className serialVersionUID newHandle classDescInfo + # classDescInfo: + # classDescFlags fields classAnnotation superClassDesc + # classDescFlags: + # (byte) // Defined in Terminal Symbols and Constants + # fields: + # (short) fieldDesc[count] + + # fieldDesc: + # primitiveDesc + # objectDesc + # primitiveDesc: + # prim_typecode fieldName + # objectDesc: + # obj_typecode fieldName className1 + clazz = JavaClass() + log_debug("[classdesc]", ident) + class_name = self._readString() + clazz.name = class_name + log_debug("Class name: %s" % class_name, ident) + + # serialVersionUID is a Java (signed) long => 8 bytes + serialVersionUID, classDescFlags = self._readStruct(">qB") + clazz.serialVersionUID = serialVersionUID + clazz.flags = classDescFlags + + self._add_reference(clazz, ident) + + log_debug( + "Serial: 0x{0:X} / {0:d} - classDescFlags: 0x{1:X} {2}".format( + serialVersionUID, + classDescFlags, + StreamCodeDebug.flags(classDescFlags), + ), + ident, + ) + (length,) = self._readStruct(">H") + log_debug("Fields num: 0x{0:X}".format(length), ident) + + clazz.fields_names = [] + clazz.fields_types = [] + for fieldId in range(length): + (typecode,) = self._readStruct(">B") + field_name = self._readString() + base_field_type = self._convert_char_to_type(typecode) + + log_debug("> Reading field {0}".format(field_name), ident) + + if base_field_type == TypeCode.TYPE_ARRAY: + _, field_type = self._read_and_exec_opcode( + ident=ident + 1, + expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE), + ) + + if type(field_type) is not JavaString: # pylint:disable=C0123 + raise AssertionError( + "Field type must be a JavaString, " + "not {0}".format(type(field_type)) + ) + + elif base_field_type == TypeCode.TYPE_OBJECT: + _, field_type = self._read_and_exec_opcode( + ident=ident + 1, + expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE), + ) + + if isinstance(field_type, JavaClass): + # FIXME: ugly trick + field_type = JavaString(field_type.name) + + if type(field_type) is not JavaString: # pylint:disable=C0123 + raise AssertionError( + "Field type must be a JavaString, " + "not {0}".format(type(field_type)) + ) + else: + # Convert the TypeCode to its char value + field_type = JavaString(str(chr(base_field_type.value))) + + log_debug( + "< FieldName: 0x{0:X} Name:{1} Type:{2} ID:{3}".format( + typecode, field_name, field_type, fieldId + ), + ident, + ) + assert field_name is not None + assert field_type is not None + + clazz.fields_names.append(field_name) + clazz.fields_types.append(field_type) + + if parent: + parent.__fields = clazz.fields_names # pylint:disable=W0212 + parent.__types = clazz.fields_types # pylint:disable=W0212 + + # classAnnotation + (opid,) = self._readStruct(">B") + log_debug( + "OpCode: 0x{0:X} -- {1} (classAnnotation)".format( + opid, StreamCodeDebug.op_id(opid) + ), + ident, + ) + if opid != TerminalCode.TC_ENDBLOCKDATA: + raise NotImplementedError("classAnnotation isn't implemented yet") + + # superClassDesc + log_debug("Reading Super Class of {0}".format(clazz.name), ident) + _, superclassdesc = self._read_and_exec_opcode( + ident=ident + 1, + expect=( + TerminalCode.TC_CLASSDESC, + TerminalCode.TC_NULL, + TerminalCode.TC_REFERENCE, + ), + ) + log_debug( + "Super Class for {0}: {1}".format(clazz.name, str(superclassdesc)), + ident, + ) + clazz.superclass = superclassdesc + return clazz + + def do_blockdata(self, parent=None, ident=0): + """ + Handles TC_BLOCKDATA opcode + + :param parent: + :param ident: Log indentation level + :return: A string containing the block data + """ + # TC_BLOCKDATA (unsigned byte) (byte)[size] + log_debug("[blockdata]", ident) + (length,) = self._readStruct(">B") + ba = self.object_stream.read(length) + + # Ensure we have an str + return read_to_str(ba) + + def do_blockdata_long(self, parent=None, ident=0): + """ + Handles TC_BLOCKDATALONG opcode + + :param parent: + :param ident: Log indentation level + :return: A string containing the block data + """ + # TC_BLOCKDATALONG (int) (byte)[size] + log_debug("[blockdatalong]", ident) + (length,) = self._readStruct(">I") + ba = self.object_stream.read(length) + + # Ensure we have an str + return read_to_str(ba) + + def do_class(self, parent=None, ident=0): + """ + Handles TC_CLASS opcode + + :param parent: + :param ident: Log indentation level + :return: A JavaClass object + """ + # TC_CLASS classDesc newHandle + log_debug("[class]", ident) + + # TODO: what to do with "(ClassDesc)prevObject". + # (see 3rd line for classDesc:) + _, classdesc = self._read_and_exec_opcode( + ident=ident + 1, + expect=( + TerminalCode.TC_CLASSDESC, + TerminalCode.TC_PROXYCLASSDESC, + TerminalCode.TC_NULL, + TerminalCode.TC_REFERENCE, + ), + ) + log_debug("Classdesc: {0}".format(classdesc), ident) + self._add_reference(classdesc, ident) + return classdesc + + def do_object(self, parent=None, ident=0): + """ + Handles a TC_OBJECT opcode + + :param parent: + :param ident: Log indentation level + :return: A JavaClass object + """ + # TC_OBJECT classDesc newHandle classdata[] // data for each class + java_object = JavaObject() + log_debug("[object]", ident) + log_debug( + "java_object.annotations just after instantiation: {0}".format( + java_object.annotations + ), + ident, + ) + + # TODO: what to do with "(ClassDesc)prevObject". + # (see 3rd line for classDesc:) + opcode, classdesc = self._read_and_exec_opcode( + ident=ident + 1, + expect=( + TerminalCode.TC_CLASSDESC, + TerminalCode.TC_PROXYCLASSDESC, + TerminalCode.TC_NULL, + TerminalCode.TC_REFERENCE, + ), + ) + # self.TC_REFERENCE hasn't shown in spec, but actually is here + + # Create object + for transformer in self.object_transformers: + java_object = transformer.create(classdesc, self) + if java_object is not None: + break + + # Store classdesc of this object + java_object.classdesc = classdesc + + # Store the reference + self._add_reference(java_object, ident) + + # classdata[] + + if ( + classdesc.flags & ClassDescFlags.SC_EXTERNALIZABLE + and not classdesc.flags & ClassDescFlags.SC_BLOCK_DATA + ): + # TODO: + raise NotImplementedError("externalContents isn't implemented yet") + + if classdesc.flags & ClassDescFlags.SC_SERIALIZABLE: + # TODO: look at ObjectInputStream.readSerialData() + # FIXME: Handle the SC_WRITE_METHOD flag + + # create megalist + tempclass = classdesc + megalist = [] + megatypes = [] + log_debug("Constructing class...", ident) + while tempclass: + log_debug("Class: {0}".format(tempclass.name), ident + 1) + class_fields_str = " - ".join( + " ".join((str(field_type), field_name)) + for field_type, field_name in zip( + tempclass.fields_types, tempclass.fields_names + ) + ) + if class_fields_str: + log_debug(class_fields_str, ident + 2) + + fieldscopy = tempclass.fields_names[:] + fieldscopy.extend(megalist) + megalist = fieldscopy + + fieldscopy = tempclass.fields_types[:] + fieldscopy.extend(megatypes) + megatypes = fieldscopy + + tempclass = tempclass.superclass + + log_debug("Values count: {0}".format(len(megalist)), ident) + log_debug("Prepared list of values: {0}".format(megalist), ident) + log_debug("Prepared list of types: {0}".format(megatypes), ident) + + for field_name, field_type in zip(megalist, megatypes): + log_debug( + "Reading field: {0} - {1}".format(field_type, field_name) + ) + res = self._read_value(field_type, ident, name=field_name) + java_object.__setattr__(field_name, res) + + if ( + classdesc.flags & ClassDescFlags.SC_SERIALIZABLE + and classdesc.flags & ClassDescFlags.SC_WRITE_METHOD + or classdesc.flags & ClassDescFlags.SC_EXTERNALIZABLE + and classdesc.flags & ClassDescFlags.SC_BLOCK_DATA + or classdesc.superclass is not None + and classdesc.superclass.flags & ClassDescFlags.SC_SERIALIZABLE + and classdesc.superclass.flags & ClassDescFlags.SC_WRITE_METHOD + ): + # objectAnnotation + log_debug( + "java_object.annotations before: {0}".format( + java_object.annotations + ), + ident, + ) + + while opcode != TerminalCode.TC_ENDBLOCKDATA: + opcode, obj = self._read_and_exec_opcode(ident=ident + 1) + # , expect=[self.TC_ENDBLOCKDATA, self.TC_BLOCKDATA, + # self.TC_OBJECT, self.TC_NULL, self.TC_REFERENCE]) + if opcode != TerminalCode.TC_ENDBLOCKDATA: + java_object.annotations.append(obj) + + log_debug("objectAnnotation value: {0}".format(obj), ident) + + log_debug( + "java_object.annotations after: {0}".format( + java_object.annotations + ), + ident, + ) + + # Allow extra loading operations + if hasattr(java_object, "__extra_loading__"): + log_debug("Java object has extra loading capability.") + java_object.__extra_loading__(self, ident) + + log_debug(">>> java_object: {0}".format(java_object), ident) + return java_object + + def do_string(self, parent=None, ident=0): + """ + Handles a TC_STRING opcode + + :param parent: + :param ident: Log indentation level + :return: A string + """ + log_debug("[string]", ident) + ba = JavaString(self._readString()) + self._add_reference(ba, ident) + return ba + + def do_string_long(self, parent=None, ident=0): + """ + Handles a TC_LONGSTRING opcode + + :param parent: + :param ident: Log indentation level + :return: A string + """ + log_debug("[long string]", ident) + ba = JavaString(self._readString("Q")) + self._add_reference(ba, ident) + return ba + + def do_array(self, parent=None, ident=0): + """ + Handles a TC_ARRAY opcode + + :param parent: + :param ident: Log indentation level + :return: A list of deserialized objects + """ + # TC_ARRAY classDesc newHandle (int) values[size] + log_debug("[array]", ident) + _, classdesc = self._read_and_exec_opcode( + ident=ident + 1, + expect=( + TerminalCode.TC_CLASSDESC, + TerminalCode.TC_PROXYCLASSDESC, + TerminalCode.TC_NULL, + TerminalCode.TC_REFERENCE, + ), + ) + + array = JavaArray(classdesc) + + self._add_reference(array, ident) + + (size,) = self._readStruct(">i") + log_debug("size: {0}".format(size), ident) + + array_type_code = TypeCode(ord(classdesc.name[0])) + assert array_type_code == TypeCode.TYPE_ARRAY + type_code = TypeCode(ord(classdesc.name[1])) + + if type_code in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY): + for _ in range(size): + _, res = self._read_and_exec_opcode(ident=ident + 1) + log_debug("Object value: {0}".format(res), ident) + array.append(res) + elif type_code == TypeCode.TYPE_BYTE: + array = JavaByteArray(self.object_stream.read(size), classdesc) + elif self.use_numpy_arrays and numpy is not None: + array = numpy.fromfile( + self.object_stream, + dtype=NUMPY_TYPE_MAP[type_code], + count=size, + ) + else: + for _ in range(size): + res = self._read_value(type_code, ident) + log_debug("Native value: {0}".format(repr(res)), ident) + array.append(res) + + return array + + def do_reference(self, parent=None, ident=0): + """ + Handles a TC_REFERENCE opcode + + :param parent: + :param ident: Log indentation level + :return: The referenced object + """ + (handle,) = self._readStruct(">L") + log_debug("## Reference handle: 0x{0:X}".format(handle), ident) + ref = self.references[handle - StreamConstants.BASE_REFERENCE_IDX] + log_debug("###-> Type: {0} - Value: {1}".format(type(ref), ref), ident) + return ref + + @staticmethod + def do_null(parent=None, ident=0): + """ + Handles a TC_NULL opcode + + :param parent: + :param ident: Log indentation level + :return: Always None + """ + return None + + def do_enum(self, parent=None, ident=0): + """ + Handles a TC_ENUM opcode + + :param parent: + :param ident: Log indentation level + :return: A JavaEnum object + """ + # TC_ENUM classDesc newHandle enumConstantName + enum = JavaEnum() + _, classdesc = self._read_and_exec_opcode( + ident=ident + 1, + expect=( + TerminalCode.TC_CLASSDESC, + TerminalCode.TC_PROXYCLASSDESC, + TerminalCode.TC_NULL, + TerminalCode.TC_REFERENCE, + ), + ) + enum.classdesc = classdesc + self._add_reference(enum, ident) + ( + _, + enumConstantName, + ) = self._read_and_exec_opcode( # pylint:disable=C0103 + ident=ident + 1, + expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE), + ) + enum.constant = enumConstantName + return enum + + def _read_value(self, raw_field_type, ident, name=""): + # type: (Union[bytes, int, TypeCode], int, str) -> Any + """ + Reads the next value, of the given type + + :param raw_field_type: A serialization typecode + :param ident: Log indentation + :param name: Field name (for logs) + :return: The read value + :raise RuntimeError: Unknown field type + """ + if isinstance(raw_field_type, TypeCode): + field_type = raw_field_type + elif isinstance(raw_field_type, int): + field_type = TypeCode(raw_field_type) + else: + # We don't need details for arrays and objects + raw_code = raw_field_type[0] + if isinstance(raw_code, int): + field_type = TypeCode(raw_code) + else: + field_type = TypeCode(ord(raw_code)) + + if field_type == TypeCode.TYPE_BOOLEAN: + (val,) = self._readStruct(">B") + res = bool(val) # type: Any + elif field_type == TypeCode.TYPE_BYTE: + (res,) = self._readStruct(">b") + elif field_type == TypeCode.TYPE_CHAR: + # TYPE_CHAR is defined by the serialization specification + # but not used in the implementation, so this is + # a hypothetical code + res = unicode_char(self._readStruct(">H")[0]) + elif field_type == TypeCode.TYPE_SHORT: + (res,) = self._readStruct(">h") + elif field_type == TypeCode.TYPE_INTEGER: + (res,) = self._readStruct(">i") + elif field_type == TypeCode.TYPE_LONG: + (res,) = self._readStruct(">q") + elif field_type == TypeCode.TYPE_FLOAT: + (res,) = self._readStruct(">f") + elif field_type == TypeCode.TYPE_DOUBLE: + (res,) = self._readStruct(">d") + elif field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY): + _, res = self._read_and_exec_opcode(ident=ident + 1) + else: + raise RuntimeError("Unknown typecode: {0}".format(field_type)) + + log_debug( + "* {0} {1}: {2}".format(chr(field_type.value), name, repr(res)), + ident, + ) + return res + + @staticmethod + def _convert_char_to_type(type_char): + # type: (Any) -> TypeCode + """ + Ensures a read character is a typecode. + + :param type_char: Read typecode + :return: The typecode as an integer (using ord) + :raise RuntimeError: Unknown typecode + """ + typecode = type_char + if not isinstance(type_char, int): + typecode = ord(type_char) + + try: + return TypeCode(typecode) + except ValueError: + raise RuntimeError( + "Typecode {0} ({1}) isn't supported.".format( + type_char, typecode + ) + ) + + def _add_reference(self, obj, ident=0): + """ + Adds a read reference to the marshaler storage + + :param obj: Reference to add + :param ident: Log indentation level + """ + log_debug( + "## New reference handle 0x{0:X}: {1} -> {2}".format( + len(self.references) + StreamConstants.BASE_REFERENCE_IDX, + type(obj).__name__, + repr(obj), + ), + ident, + ) + self.references.append(obj) + + def _oops_dump_state(self, ignore_remaining_data=False): + """ + Log a deserialization error + + :param ignore_remaining_data: If True, don't log an error when + unused trailing bytes are remaining + """ + log_error("==Oops state dump" + "=" * (30 - 17)) + log_error("References: {0}".format(self.references)) + log_error( + "Stream seeking back at -16 byte " + "(2nd line is an actual position!):" + ) + + # Do not use a keyword argument + self.object_stream.seek(-16, os.SEEK_CUR) + position = self.object_stream.tell() + the_rest = self.object_stream.read() + + if not ignore_remaining_data and len(the_rest) != 0: + log_error( + "Warning!!!!: Stream still has {0} bytes left:\n{1}".format( + len(the_rest), hexdump(the_rest, position) + ) + ) + + log_error("=" * 30) diff --git a/javaobj/v2/__init__.py b/javaobj/v2/__init__.py new file mode 100644 index 0000000..e9745ea --- /dev/null +++ b/javaobj/v2/__init__.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +""" +Rewritten version of the un-marshalling process of javaobj. + +The previous process had issues in some cases that + +This package is based on the approach of the jdeserialize project (in Java) +See: https://github.com/frohoff/jdeserialize + +The object transformer concept of javaobj has been adapted to work with this +approach. + +This package should handle more files than before, in read-only mode. +The writing mode should be handled by the "classic" javaobj code. + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from . import api, beans, core, main, stream, transformers # noqa: 401 +from .main import load, loads # noqa: 401 + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" diff --git a/javaobj/v2/api.py b/javaobj/v2/api.py new file mode 100644 index 0000000..8d9cd0d --- /dev/null +++ b/javaobj/v2/api.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Definition of the object transformer API + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +from typing import List, Optional + +from ..constants import TypeCode # pylint:disable=W0611 +from .beans import ( # pylint:disable=W0611 + JavaClassDesc, + JavaInstance, + ParsedJavaContent, +) +from .stream import DataStreamReader # pylint:disable=W0611 + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class IJavaStreamParser: + """ + API of the Java stream parser + """ + + def run(self): + # type: () -> List[ParsedJavaContent] + """ + Parses the input stream + """ + raise NotImplementedError + + def dump(self, content): + # type: (List[ParsedJavaContent]) -> str + """ + Dumps to a string the given objects + """ + raise NotImplementedError + + def _read_content(self, type_code, block_data, class_desc=None): + # type: (int, bool, Optional[JavaClassDesc]) -> ParsedJavaContent + """ + Parses the next content. Use with care (use only in a transformer) + """ + + +class ObjectTransformer(object): # pylint:disable=R0205 + """ + Representation of an object transformer + """ + + def create_instance(self, classdesc): # pylint:disable=W0613,R0201 + # type: (JavaClassDesc) -> Optional[JavaInstance] + """ + Transforms a parsed Java object into a Python object. + + The result must be a JavaInstance bean, or None if the transformer + doesn't support this kind of instance. + + :param classdesc: The description of a Java class + :return: The Python form of the object, or the original JavaObject + """ + return None + + def load_array( + self, reader, type_code, size + ): # pylint:disable=W0613,R0201 + # type: (DataStreamReader, TypeCode, int) -> Optional[list] + """ + Loads and returns the content of a Java array, if possible. + + The result of this method must be the content of the array, i.e. a list + or an array. It will be stored in a JavaArray bean created by the + parser. + + This method must return None if it can't handle the array. + + :param reader: The data stream reader + :param type_code: Type of the elements of the array + :param size: Number of elements in the array + """ + return None + + def load_custom_writeObject( + self, parser, reader, name + ): # pylint:disable=W0613,R0201 + # type: (IJavaStreamParser, DataStreamReader, str) -> Optional[JavaClassDesc] + """ + Reads content stored from a custom writeObject. + + This method is called only if the class description has both the + ``SC_SERIALIZABLE`` and ``SC_WRITE_METHOD`` flags set. + + The stream parsing will stop and fail if this method returns None. + + :param parser: The JavaStreamParser in use + :param reader: The data stream reader + :param name: The class description name + :return: A Java class description, if handled, else None + """ + return None diff --git a/javaobj/v2/beans.py b/javaobj/v2/beans.py new file mode 100644 index 0000000..0b81f16 --- /dev/null +++ b/javaobj/v2/beans.py @@ -0,0 +1,641 @@ +#!/usr/bin/env python3 +""" +Definition of the beans used to represent the parsed objects + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +import logging +from enum import IntEnum +from typing import Any, Dict, List, Optional, Set + +from ..constants import ClassDescFlags, TypeCode +from ..modifiedutf8 import byte_to_int, decode_modified_utf8 +from ..utils import UNICODE_TYPE + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class ContentType(IntEnum): + """ + Types of objects + """ + + INSTANCE = 0 + CLASS = 1 + ARRAY = 2 + STRING = 3 + ENUM = 4 + CLASSDESC = 5 + BLOCKDATA = 6 + EXCEPTIONSTATE = 7 + + +class ClassDataType(IntEnum): + """ + Class data types + """ + + NOWRCLASS = 0 + WRCLASS = 1 + EXTERNAL_CONTENTS = 2 + OBJECT_ANNOTATION = 3 + + +class ClassDescType(IntEnum): + """ + Types of class descriptions + """ + + NORMALCLASS = 0 + PROXYCLASS = 1 + + +class FieldType(IntEnum): + """ + Types of class fields + """ + + BYTE = TypeCode.TYPE_BYTE.value + CHAR = TypeCode.TYPE_CHAR.value + DOUBLE = TypeCode.TYPE_DOUBLE.value + FLOAT = TypeCode.TYPE_FLOAT.value + INTEGER = TypeCode.TYPE_INTEGER.value + LONG = TypeCode.TYPE_LONG.value + SHORT = TypeCode.TYPE_SHORT.value + BOOLEAN = TypeCode.TYPE_BOOLEAN.value + ARRAY = TypeCode.TYPE_ARRAY.value + OBJECT = TypeCode.TYPE_OBJECT.value + + def type_code(self): + # type: () -> TypeCode + """ + Converts this FieldType to its matching TypeCode + """ + return TypeCode(self.value) + + +class ParsedJavaContent(object): # pylint:disable=R205 + """ + Generic representation of data parsed from the stream + """ + + def __init__(self, content_type): + # type: (ContentType) -> None + self.type = content_type # type: ContentType + self.is_exception = False # type: bool + self.handle = 0 # type: int + + def __str__(self): + return "[ParseJavaObject 0x{0:x} - {1}]".format(self.handle, self.type) + + __repr__ = __str__ + + def dump(self, indent=0): + # type: (int) -> str + """ + Base implementation of a parsed object + """ + return "\t" * indent + str(self) + + def validate(self): + """ + Validity check on the object + """ + pass + + +class ExceptionState(ParsedJavaContent): + """ + Representation of a failed parsing + """ + + def __init__(self, exception_object, data): + # type: (ParsedJavaContent, bytes) -> None + super(ExceptionState, self).__init__(ContentType.EXCEPTIONSTATE) + self.exception_object = exception_object + self.stream_data = data + self.handle = exception_object.handle + + def dump(self, indent=0): + # type: (int) -> str + """ + Returns a dump representation of the exception + """ + return "\t" * indent + "[ExceptionState {0:x}]".format(self.handle) + + +class ExceptionRead(Exception): + """ + Exception used to indicate that an exception object has been parsed + """ + + def __init__(self, content): + # type: (ParsedJavaContent) -> None + self.exception_object = content + + +class JavaString(ParsedJavaContent): + """ + Represents a Java string + """ + + def __init__(self, handle, data): + # type: (int, bytes) -> None + super(JavaString, self).__init__(ContentType.STRING) + self.handle = handle + value, length = decode_modified_utf8(data) + self.value = value # type: str + self.length = length # type: int + + def __repr__(self): + return repr(self.value) + + def __str__(self): + return self.value + + def dump(self, indent=0): + # type: (int) -> str + """ + Returns a dump representation of the string + """ + return "\t" * indent + "[String {0:x}: {1}]".format( + self.handle, repr(self.value) + ) + + def __hash__(self): + return hash(self.value) + + def __eq__(self, other): + return self.value == other + + +class JavaField: + """ + Represents a field in a Java class description + """ + + def __init__(self, field_type, name, class_name=None): + # type: (FieldType, str, Optional[JavaString]) -> None + self.type = field_type + self.name = name + self.class_name = class_name + self.is_inner_class_reference = False + + if self.class_name: + self.validate(self.class_name.value) + + def validate(self, java_type): + # type: (str) -> None + """ + Validates the type given as parameter + """ + if self.type == FieldType.OBJECT: + if not java_type: + raise ValueError("Class name can't be empty") + + if java_type[0] != "L" or java_type[-1] != ";": + raise ValueError( + "Invalid object field type: {0}".format(java_type) + ) + + +class JavaClassDesc(ParsedJavaContent): + """ + Represents the description of a class + """ + + def __init__(self, class_desc_type): + # type: (ClassDescType) -> None + super(JavaClassDesc, self).__init__(ContentType.CLASSDESC) + + # Type of class description + self.class_type = class_desc_type # type: ClassDescType + + # Class name + self.name = None # type: Optional[str] + + # Serial version UID + self.serial_version_uid = 0 # type: int + + # Description flags byte + self.desc_flags = 0 # type: int + + # Fields in the class + self.fields = [] # type: List[JavaField] + + # Inner classes + self.inner_classes = [] # type: List[JavaClassDesc] + + # List of annotations objects + self.annotations = [] # type: List[ParsedJavaContent] + + # The super class of this one, if any + self.super_class = None # type: Optional[JavaClassDesc] + + # Indicates if it is a super class + self.is_super_class = False + + # List of the interfaces of the class + self.interfaces = [] # type: List[str] + + # Set of enum constants + self.enum_constants = set() # type: Set[str] + + # Flag to indicate if this is an inner class + self.is_inner_class = False # type: bool + + # Flag to indicate if this is a local inner class + self.is_local_inner_class = False # type: bool + + # Flag to indicate if this is a static member class + self.is_static_member_class = False # type: bool + + def __str__(self): + return "[classdesc 0x{0:x}: name {1}, uid {2}]".format( + self.handle, self.name, self.serial_version_uid + ) + + __repr__ = __str__ + + def dump(self, indent=0): + # type: (int) -> str + """ + Returns a dump representation of the exception + """ + return "\t" * indent + "[classdesc 0x{0:x}: name {1}, uid {2}]".format( + self.handle, self.name, self.serial_version_uid + ) + + @property + def serialVersionUID(self): # pylint:disable=C0103 + """ + Mimics the javaobj API + """ + return self.serial_version_uid + + @property + def flags(self): + """ + Mimics the javaobj API + """ + return self.desc_flags + + @property + def fields_names(self): + """ + Mimics the javaobj API + """ + return [field.name for field in self.fields] + + @property + def fields_types(self): + """ + Mimics the javaobj API + """ + return [field.type for field in self.fields] + + @property + def data_type(self): + """ + Computes the data type of this class (Write, No Write, Annotation) + """ + if ClassDescFlags.SC_SERIALIZABLE & self.desc_flags: + return ( + ClassDataType.WRCLASS + if (ClassDescFlags.SC_WRITE_METHOD & self.desc_flags) + else ClassDataType.NOWRCLASS + ) + + if ClassDescFlags.SC_EXTERNALIZABLE & self.desc_flags: + return ( + ClassDataType.OBJECT_ANNOTATION + if (ClassDescFlags.SC_WRITE_METHOD & self.desc_flags) + else ClassDataType.EXTERNAL_CONTENTS + ) + + raise ValueError("Unhandled Class Data Type") + + def is_array_class(self): + # type: () -> bool + """ + Determines if this is an array type + """ + return self.name.startswith("[") if self.name else False + + def get_hierarchy(self, classes): + # type: (List["JavaClassDesc"]) -> None + """ + Generates a list of class descriptions in this class's hierarchy, in + the order described by the Object Stream Serialization Protocol. + This is the order in which fields are read from the stream. + + :param classes: A list to be filled in with the hierarchy + """ + if self.super_class is not None: + if self.super_class.class_type == ClassDescType.PROXYCLASS: + logging.warning("Hit a proxy class in super class hierarchy") + else: + self.super_class.get_hierarchy(classes) + + classes.append(self) + + def validate(self): + """ + Checks the validity of this class description + """ + serial_or_extern = ( + ClassDescFlags.SC_SERIALIZABLE | ClassDescFlags.SC_EXTERNALIZABLE + ) + if (self.desc_flags & serial_or_extern) == 0 and self.fields: + raise ValueError( + "Non-serializable, non-externalizable class has fields" + ) + + if self.desc_flags & serial_or_extern == serial_or_extern: + raise ValueError("Class is both serializable and externalizable") + + if self.desc_flags & ClassDescFlags.SC_ENUM: + if self.fields or self.interfaces: + raise ValueError( + "Enums shouldn't implement interfaces " + "or have non-constant fields" + ) + else: + if self.enum_constants: + raise ValueError( + "Non-enum classes shouldn't have enum constants" + ) + + +class JavaInstance(ParsedJavaContent): + """ + Represents an instance of Java object + """ + + def __init__(self): + super(JavaInstance, self).__init__(ContentType.INSTANCE) + self.classdesc = None # type: JavaClassDesc + self.field_data = {} # type: Dict[JavaClassDesc, Dict[JavaField, Any]] + self.annotations = ( + {} + ) # type: Dict[JavaClassDesc, List[ParsedJavaContent]] + self.is_external_instance = False + + def __str__(self): + return "[instance 0x{0:x}: type {1}]".format( + self.handle, self.classdesc.name + ) + + __repr__ = __str__ + + def dump(self, indent=0): + # type: (int) -> str + """ + Returns a dump representation of the exception + """ + prefix = "\t" * indent + sub_prefix = "\t" * (indent + 1) + + dump = [ + prefix + + "[instance 0x{0:x}: {1:x} / {2}]".format( + self.handle, self.classdesc.handle, self.classdesc.name + ) + ] + + for cd, annotations in self.annotations.items(): + dump.append( + "{0}{1} -- {2} annotations".format( + prefix, cd.name, len(annotations) + ) + ) + for ann in annotations: + dump.append(sub_prefix + repr(ann)) + + for cd, fields in self.field_data.items(): + dump.append( + "{0}{1} -- {2} fields".format(prefix, cd.name, len(fields)) + ) + for field, value in fields.items(): + if isinstance(value, ParsedJavaContent): + if self.handle != 0 and value.handle == self.handle: + value_str = "this" + else: + value_str = "\n" + value.dump(indent + 2) + else: + value_str = repr(value) + + dump.append( + "{0}{1} {2}: {3}".format( + sub_prefix, field.type.name, field.name, value_str + ) + ) + + dump.append(prefix + "[/instance 0x{0:x}]".format(self.handle)) + return "\n".join(dump) + + def __getattr__(self, name): + """ + Returns the field with the given name + """ + for cd_fields in self.field_data.values(): + for field, value in cd_fields.items(): + if field.name == name: + return value + + raise AttributeError(name) + + def get_class(self): + """ + Returns the class of this instance + """ + return self.classdesc + + def load_from_blockdata( + self, parser, reader, indent=0 + ): # pylint:disable=W0613,R0201 + """ + Reads content stored in a block data. + + This method is called only if the class description has both the + ``SC_EXTERNALIZABLE`` and ``SC_BLOCK_DATA`` flags set. + + The stream parsing will stop and fail if this method returns False. + + :param parser: The JavaStreamParser in use + :param reader: The underlying data stream reader + :param indent: Indentation to use in logs + :return: True on success, False on error + """ + return False + + def load_from_instance(self, indent=0): # pylint:disable=W0613,R0201 + # type: (int) -> bool + """ + Updates the content of this instance from its parsed fields and + annotations + + :param indent: Indentation to use in logs + :return: True on success, False on error (currently ignored) + """ + return False + + +class JavaClass(ParsedJavaContent): + """ + Represents a stored Java class + """ + + def __init__(self, handle, class_desc): + # type: (int, JavaClassDesc) -> None + super(JavaClass, self).__init__(ContentType.CLASS) + self.handle = handle + self.classdesc = class_desc + + def __str__(self): + return "[class 0x{0:x}: {1}]".format(self.handle, self.classdesc) + + __repr__ = __str__ + + @property + def name(self): + """ + Mimics the javaobj API + """ + return self.classdesc.name + + +class JavaEnum(ParsedJavaContent): + """ + Represents an enumeration value + """ + + def __init__(self, handle, class_desc, value): + # type: (int, JavaClassDesc, JavaString) -> None + super(JavaEnum, self).__init__(ContentType.ENUM) + self.handle = handle + self.classdesc = class_desc + self.value = value + + def __str__(self): + return "[Enum 0x{0:x}: {1}]".format(self.handle, self.value) + + __repr__ = __str__ + + @property + def constant(self): + """ + Mimics the javaobj API + """ + return self.value + + +class JavaArray(ParsedJavaContent, list): + """ + Represents a Java array + """ + + def __init__(self, handle, class_desc, field_type, content): + # type: (int, JavaClassDesc, FieldType, List[Any]) -> None + list.__init__(self, content) + ParsedJavaContent.__init__(self, ContentType.ARRAY) + self.handle = handle + self.classdesc = class_desc + self.field_type = field_type + self.data = content + + def __str__(self): + return "[{0}]".format(", ".join(repr(x) for x in self)) + + __repr__ = __str__ + + def dump(self, indent=0): + # type: (int) -> str + """ + Returns a dump representation of the array + """ + prefix = "\t" * indent + sub_prefix = "\t" * (indent + 1) + dump = [ + "{0}[array 0x{1:x}: {2} items - stored as {3}]".format( + prefix, self.handle, len(self), type(self.data).__name__ + ) + ] + for x in self: + if isinstance(x, ParsedJavaContent): + if self.handle != 0 and x.handle == self.handle: + dump.append("this,") + else: + dump.append(x.dump(indent + 1) + ",") + else: + dump.append(sub_prefix + repr(x) + ",") + dump.append(prefix + "[/array 0x{0:x}]".format(self.handle)) + return "\n".join(dump) + + @property + def _data(self): + """ + Mimics the javaobj API + """ + return tuple(self) + + +class BlockData(ParsedJavaContent): + """ + Represents a data block + """ + + def __init__(self, data): + # type: (bytes) -> None + super(BlockData, self).__init__(ContentType.BLOCKDATA) + self.data = data + + def __str__(self): + return "[blockdata 0x{0:x}: {1} bytes]".format( + self.handle, len(self.data) + ) + + def __repr__(self): + return repr(self.data) + + def __eq__(self, other): + if isinstance(other, (str, UNICODE_TYPE)): + other_data = tuple(ord(x) for x in other) + elif isinstance(other, bytes): + other_data = tuple(byte_to_int(x) for x in other) + else: + # Can't compare + return False + + return other_data == tuple(byte_to_int(x) for x in self.data) diff --git a/javaobj/v2/core.py b/javaobj/v2/core.py new file mode 100644 index 0000000..8e018a6 --- /dev/null +++ b/javaobj/v2/core.py @@ -0,0 +1,766 @@ +#!/usr/bin/env python3 +""" +Second parsing approach for javaobj, using the same approach as jdeserialize +See: https://github.com/frohoff/jdeserialize + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +import logging +import os +from typing import ( # pylint:disable=W0611 + IO, + Any, + Callable, + Dict, + List, + Optional, +) + +from ..constants import ( + PRIMITIVE_TYPES, + StreamConstants, + TerminalCode, + TypeCode, +) +from ..modifiedutf8 import ( # pylint:disable=W0611 # noqa: F401 + decode_modified_utf8, +) +from . import api # pylint:disable=W0611 +from .beans import ( + BlockData, + ClassDataType, + ClassDescType, + ExceptionRead, + ExceptionState, + FieldType, + JavaArray, + JavaClass, + JavaClassDesc, + JavaEnum, + JavaField, + JavaInstance, + JavaString, + ParsedJavaContent, +) +from .stream import DataStreamReader +from .transformers import DefaultObjectTransformer + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class JavaStreamParser(api.IJavaStreamParser): + """ + Parses a Java stream + """ + + def __init__(self, fd, transformers): + # type: (IO[bytes], List[api.ObjectTransformer]) -> None + """ + :param fd: File-object to read from + :param transformers: Custom object transformers + """ + # Input stream + self.__fd = fd + self.__reader = DataStreamReader(fd) + + # Object transformers + self.__transformers = list(transformers) + + # Logger + self._log = logging.getLogger("javaobj.parser") + + # Handles + self.__handle_maps = [] # type: List[Dict[int, ParsedJavaContent]] + self.__handles = {} # type: Dict[int, ParsedJavaContent] + + # Initial handle value + self.__current_handle = StreamConstants.BASE_REFERENCE_IDX.value + + # Definition of the type code handlers + # Each takes the type code as argument + self.__type_code_handlers = { + TerminalCode.TC_OBJECT: self._do_object, + TerminalCode.TC_CLASS: self._do_class, + TerminalCode.TC_ARRAY: self._do_array, + TerminalCode.TC_STRING: self._read_new_string, + TerminalCode.TC_LONGSTRING: self._read_new_string, + TerminalCode.TC_ENUM: self._do_enum, + TerminalCode.TC_CLASSDESC: self._do_classdesc, + TerminalCode.TC_PROXYCLASSDESC: self._do_classdesc, + TerminalCode.TC_REFERENCE: self._do_reference, + TerminalCode.TC_NULL: self._do_null, + TerminalCode.TC_EXCEPTION: self._do_exception, + TerminalCode.TC_BLOCKDATA: self._do_block_data, + TerminalCode.TC_BLOCKDATALONG: self._do_block_data, + } # type: Dict[int, Callable[[int], ParsedJavaContent]] + + def run(self): + # type: () -> List[ParsedJavaContent] + """ + Parses the input stream + """ + # Check the magic byte + magic = self.__reader.read_ushort() + if magic != StreamConstants.STREAM_MAGIC: + raise ValueError("Invalid file magic: 0x{0:x}".format(magic)) + + # Check the stream version + version = self.__reader.read_ushort() + if version != StreamConstants.STREAM_VERSION: + raise ValueError("Invalid file version: 0x{0:x}".format(version)) + + # Reset internal state + self._reset() + + # Read content + contents = [] # type: List[ParsedJavaContent] + while True: + self._log.info("Reading next content") + start = self.__fd.tell() + try: + type_code = self.__reader.read_byte() + except EOFError: + # End of file + break + + if type_code == TerminalCode.TC_RESET: + # Explicit reset + self._reset() + continue + + parsed_content = self._read_content(type_code, True) + self._log.debug("Read: %s", parsed_content) + if parsed_content is not None and parsed_content.is_exception: + # Get the raw data between the start of the object and our + # current position + end = self.__fd.tell() + self.__fd.seek(start, os.SEEK_SET) + stream_data = self.__fd.read(end - start) + + # Prepare an exception object + parsed_content = ExceptionState(parsed_content, stream_data) + + contents.append(parsed_content) + + for content in self.__handles.values(): + content.validate() + + # TODO: connect member classes ? (see jdeserialize @ 864) + + if self.__handles: + self.__handle_maps.append(self.__handles.copy()) + + return contents + + def dump(self, content): + # type: (List[ParsedJavaContent]) -> str + """ + Dumps to a string the given objects + """ + lines = [] # type: List[str] + + # Stream content + lines.append("//// BEGIN stream content output") + lines.extend(str(c) for c in content) + lines.append("//// END stream content output") + lines.append("") + + lines.append("//// BEGIN instance dump") + for c in self.__handles.values(): + if isinstance(c, JavaInstance): + instance = c # type: JavaInstance + lines.extend(self._dump_instance(instance)) + lines.append("//// END instance dump") + lines.append("") + return "\n".join(lines) + + @staticmethod + def _dump_instance(instance): + # type: (JavaInstance) -> List[str] + """ + Dumps an instance to a set of lines + """ + lines = [] # type: List[str] + lines.append( + "[instance 0x{0:x}: 0x{1:x} / {2}".format( + instance.handle, + instance.classdesc.handle, + instance.classdesc.name, + ) + ) + + if instance.annotations: + lines.append("\tobject annotations:") + for cd, annotation in instance.annotations.items(): + lines.append("\t" + (cd.name or "null")) + for c in annotation: + lines.append("\t\t" + str(c)) + + if instance.field_data: + lines.append("\tfield data:") + for field, obj in instance.field_data.items(): + line = "\t\t" + (field.name or "null") + ": " + if isinstance(obj, ParsedJavaContent): + content = obj # type: ParsedJavaContent + h = content.handle + if h == instance.handle: + line += "this" + else: + line += "r0x{0:x}".format(h) + + line += ": " + str(content) + else: + line += str(obj) + + lines.append(line) + + lines.append("]") + return lines + + def _reset(self): + """ + Resets the internal state of the parser + """ + if self.__handles: + self.__handle_maps.append(self.__handles.copy()) + + self.__handles.clear() + + # Reset handle index + self.__current_handle = StreamConstants.BASE_REFERENCE_IDX + + def _new_handle(self): + # type: () -> int + """ + Returns a new handle value + """ + handle = self.__current_handle + self.__current_handle += 1 + return handle + + def _set_handle(self, handle, content): + # type: (int, ParsedJavaContent) -> None + """ + Stores the reference to an object + """ + if handle in self.__handles: + raise ValueError("Trying to reset handle {0:x}".format(handle)) + + self.__handles[handle] = content + + @staticmethod + def _do_null(_): + """ + The easiest one + """ + return None + + def _read_content(self, type_code, block_data, class_desc=None): + # type: (int, bool, Optional[JavaClassDesc]) -> ParsedJavaContent + """ + Parses the next content + """ + if not block_data and type_code in ( + TerminalCode.TC_BLOCKDATA, + TerminalCode.TC_BLOCKDATALONG, + ): + raise ValueError("Got a block data, but not allowed here.") + + try: + # Look for a handler for that type code + handler = self.__type_code_handlers[type_code] + except KeyError: + # Look for an external reader + if ( + class_desc + and class_desc.name + and class_desc.data_type == ClassDataType.WRCLASS + ): + # Return its result immediately + return self._custom_readObject(class_desc.name) + + # No valid custom reader: abandon + raise ValueError("Unknown type code: 0x{0:x}".format(type_code)) + else: + try: + # Parse the object + return handler(type_code) + except ExceptionRead as ex: + # We found an exception object: return it (raise later) + return ex.exception_object + + def _read_new_string(self, type_code): + # type: (int) -> JavaString + """ + Reads a Java String + """ + if type_code == TerminalCode.TC_REFERENCE: + # Got a reference + previous = self._do_reference() + if not isinstance(previous, JavaString): + raise ValueError("Invalid reference to a Java string") + return previous + + # Assign a new handle + handle = self._new_handle() + + # Read the length + if type_code == TerminalCode.TC_STRING: + length = self.__reader.read_ushort() + elif type_code == TerminalCode.TC_LONGSTRING: + length = self.__reader.read_long() + if length < 0 or length > 2147483647: + raise ValueError("Invalid string length: {0}".format(length)) + + if length < 65536: + self._log.warning("Small string stored as a long one") + + # Parse the content + data = self.__fd.read(length) + java_str = JavaString(handle, data) + + # Store the reference to the string + self._set_handle(handle, java_str) + return java_str + + def _read_classdesc(self): + # type: () -> JavaClassDesc + """ + Reads a class description with its type code + """ + type_code = self.__reader.read_byte() + return self._do_classdesc(type_code) + + def _do_classdesc(self, type_code): + # type: (int) -> JavaClassDesc + """ + Parses a class description + """ + if type_code == TerminalCode.TC_CLASSDESC: + # Do the real job + name = self.__reader.read_UTF() + serial_version_uid = self.__reader.read_long() + handle = self._new_handle() + desc_flags = self.__reader.read_byte() + nb_fields = self.__reader.read_short() + + if nb_fields < 0: + raise ValueError("Invalid field count: {0}".format(nb_fields)) + + fields = [] # type: List[JavaField] + for _ in range(nb_fields): + field_type = self.__reader.read_byte() + field_name = self.__reader.read_UTF() + class_name = None + + if field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY): + # String type code + str_type_code = self.__reader.read_byte() + class_name = self._read_new_string(str_type_code) + elif field_type not in PRIMITIVE_TYPES: + raise ValueError( + "Invalid field type char: 0x{0:x}".format(field_type) + ) + + fields.append( + JavaField(FieldType(field_type), field_name, class_name) + ) + + # Setup the class description bean + class_desc = JavaClassDesc(ClassDescType.NORMALCLASS) + class_desc.name = name + class_desc.serial_version_uid = serial_version_uid + class_desc.handle = handle + class_desc.desc_flags = desc_flags + class_desc.fields = fields + class_desc.annotations = self._read_class_annotations(class_desc) + class_desc.super_class = self._read_classdesc() + + if class_desc.super_class: + class_desc.super_class.is_super_class = True + + # Store the reference to the parsed bean + self._set_handle(handle, class_desc) + return class_desc + elif type_code == TerminalCode.TC_NULL: + # Null reference + return None + elif type_code == TerminalCode.TC_REFERENCE: + # Reference to an already loading class description + previous = self._do_reference() + if not isinstance(previous, JavaClassDesc): + raise ValueError( + "Referenced object is not a class description" + ) + return previous + elif type_code == TerminalCode.TC_PROXYCLASSDESC: + # Proxy class description + handle = self._new_handle() + nb_interfaces = self.__reader.read_int() + interfaces = [ + self.__reader.read_UTF() for _ in range(nb_interfaces) + ] + + class_desc = JavaClassDesc(ClassDescType.PROXYCLASS) + class_desc.handle = handle + class_desc.interfaces = interfaces + class_desc.annotations = self._read_class_annotations() + class_desc.super_class = self._read_classdesc() + + if class_desc.super_class: + class_desc.super_class.is_super_class = True + + # Store the reference to the parsed bean + self._set_handle(handle, class_desc) + return class_desc + + raise ValueError("Expected a valid class description starter") + + def _custom_readObject(self, class_name): + # type: (str) -> ParsedJavaContent + """ + Reads an object with a custom serialization process + + :param class_name: Name of the class to load + :return: The parsed object + :raise ValueError: Unknown kind of class + """ + self.__fd.seek(-1, os.SEEK_CUR) + for transformer in self.__transformers: + class_data = transformer.load_custom_writeObject( + self, self.__reader, class_name + ) + if class_data: + return class_data + + raise ValueError("Custom readObject can not be processed") + + def _read_class_annotations(self, class_desc=None): + # type: (Optional[JavaClassDesc]) -> List[ParsedJavaContent] + """ + Reads the annotations associated to a class + """ + contents = [] # type: List[ParsedJavaContent] + while True: + type_code = self.__reader.read_byte() + if type_code == TerminalCode.TC_ENDBLOCKDATA: + # We're done here + return contents + elif type_code == TerminalCode.TC_RESET: + # Reset references + self._reset() + continue + + java_object = self._read_content(type_code, True, class_desc) + + if java_object is not None and java_object.is_exception: + # Found an exception: raise it + raise ExceptionRead(java_object) + + contents.append(java_object) + + raise Exception("Class annotation reading stopped before end") + + def _create_instance(self, class_desc): + # type: (JavaClassDesc) -> JavaInstance + """ + Creates a JavaInstance object, by a transformer if possible + """ + # Try to create the transformed object + for transformer in self.__transformers: + instance = transformer.create_instance(class_desc) + if instance is not None: + if class_desc.name: + instance.is_external_instance = not self._is_default_supported( + class_desc.name + ) + return instance + + return JavaInstance() + + def _do_object(self, type_code=0): + # type: (int) -> JavaInstance + """ + Parses an object + """ + # Parse the object class description + class_desc = self._read_classdesc() + + # Assign a new handle + handle = self._new_handle() + self._log.debug( + "Reading new object: handle %x, classdesc %s", handle, class_desc + ) + + # Prepare the instance object + instance = self._create_instance(class_desc) + instance.classdesc = class_desc + instance.handle = handle + + # Store the instance + self._set_handle(handle, instance) + + # Read the instance content + self._read_class_data(instance) + self._log.debug("Done reading object handle %x", handle) + return instance + + def _is_default_supported(self, class_name): + # type: (str) -> bool + """ + Checks if this class is supported by the default object transformer + """ + default_transf = [ + x + for x in self.__transformers + if isinstance(x, DefaultObjectTransformer) + ] + return ( + bool(default_transf) + and class_name in default_transf[0]._type_mapper + ) + + def _read_class_data(self, instance): + # type: (JavaInstance) -> None + """ + Reads the content of an instance + """ + # Read the class hierarchy + classes = [] # type: List[JavaClassDesc] + instance.classdesc.get_hierarchy(classes) + + all_data = {} # type: Dict[JavaClassDesc, Dict[JavaField, Any]] + annotations = {} # type: Dict[JavaClassDesc, List[ParsedJavaContent]] + + for cd in classes: + values = {} # type: Dict[JavaField, Any] + cd.validate() + if ( + cd.data_type == ClassDataType.NOWRCLASS + or cd.data_type == ClassDataType.WRCLASS + ): + if ( + cd.data_type == ClassDataType.WRCLASS + and instance.is_external_instance + ): + annotations[cd] = self._read_class_annotations(cd) + else: + for field in cd.fields: + values[field] = self._read_field_value(field.type) + all_data[cd] = values + + if cd.data_type == ClassDataType.WRCLASS: + annotations[cd] = self._read_class_annotations(cd) + else: + if cd.data_type == ClassDataType.OBJECT_ANNOTATION: + # Call the transformer if possible + if not instance.load_from_blockdata(self, self.__reader): + # Can't read :/ + raise ValueError( + "hit externalizable with nonzero SC_BLOCK_DATA; " + "can't interpret data" + ) + annotations[cd] = self._read_class_annotations(cd) + + # Fill the instance object + instance.annotations = annotations + instance.field_data = all_data + + # Load transformation from the fields and annotations + instance.load_from_instance() + + def _read_field_value(self, field_type): + # type: (FieldType) -> Any + """ + Reads the value of an instance field + """ + if field_type == FieldType.BYTE: + return self.__reader.read_byte() + if field_type == FieldType.CHAR: + return self.__reader.read_char() + if field_type == FieldType.DOUBLE: + return self.__reader.read_double() + if field_type == FieldType.FLOAT: + return self.__reader.read_float() + if field_type == FieldType.INTEGER: + return self.__reader.read_int() + if field_type == FieldType.LONG: + return self.__reader.read_long() + if field_type == FieldType.SHORT: + return self.__reader.read_short() + if field_type == FieldType.BOOLEAN: + return self.__reader.read_bool() + if field_type in (FieldType.OBJECT, FieldType.ARRAY): + sub_type_code = self.__reader.read_byte() + if field_type == FieldType.ARRAY: + if sub_type_code == TerminalCode.TC_NULL: + # Seems required, according to issue #46 + return None + if sub_type_code == TerminalCode.TC_REFERENCE: + return self._do_classdesc(sub_type_code) + if sub_type_code != TerminalCode.TC_ARRAY: + raise ValueError( + "Array type listed, but type code != TC_ARRAY" + ) + + content = self._read_content(sub_type_code, False) + if content is not None and content.is_exception: + raise ExceptionRead(content) + + return content + + raise ValueError("Can't process type: {0}".format(field_type)) + + def _do_reference(self, type_code=0): + # type: (int) -> ParsedJavaContent + """ + Returns an object already parsed + """ + handle = self.__reader.read_int() + try: + return self.__handles[handle] + except KeyError: + raise ValueError("Invalid reference handle: {0:x}".format(handle)) + + def _do_enum(self, type_code): + # type: (int) -> JavaEnum + """ + Parses an enumeration + """ + cd = self._read_classdesc() + if cd is None: + raise ValueError("Enum description can't be null") + + handle = self._new_handle() + + # Read the enum string + sub_type_code = self.__reader.read_byte() + enum_str = self._read_new_string(sub_type_code) + cd.enum_constants.add(enum_str.value) + + # Store the object + enum_obj = JavaEnum(handle, cd, enum_str) + self._set_handle(handle, enum_obj) + return enum_obj + + def _do_class(self, type_code): + # type: (int) -> JavaClass + """ + Parses a class + """ + cd = self._read_classdesc() + handle = self._new_handle() + class_obj = JavaClass(handle, cd) + + # Store the class object + self._set_handle(handle, class_obj) + return class_obj + + def _do_array(self, type_code): + # type: (int) -> JavaArray + """ + Parses an array + """ + cd = self._read_classdesc() + handle = self._new_handle() + if not cd.name or len(cd.name) < 2: + raise ValueError("Invalid name in array class description") + + # ParsedJavaContent type + content_type_byte = ord(cd.name[1].encode("latin1")) + field_type = FieldType(content_type_byte) + + # Array size + size = self.__reader.read_int() + if size < 0: + raise ValueError("Invalid array size") + + # Array content + for transformer in self.__transformers: + content = transformer.load_array( + self.__reader, field_type.type_code(), size + ) + if content is not None: + break + else: + content = [self._read_field_value(field_type) for _ in range(size)] + + return JavaArray(handle, cd, field_type, content) + + def _do_exception(self, type_code): + # type: (int) -> ParsedJavaContent + """ + Read the content of a thrown exception + """ + # Start by resetting current state + self._reset() + + type_code = self.__reader.read_byte() + if type_code == TerminalCode.TC_RESET: + raise ValueError("TC_RESET read while reading exception") + + content = self._read_content(type_code, False) + if content is None: + raise ValueError("Null exception object") + + if not isinstance(content, JavaInstance): + raise ValueError("Exception object is not an instance") + + if content.is_exception: + raise ExceptionRead(content) + + # Strange object ? + content.is_exception = True + self._reset() + return content + + def _do_block_data(self, type_code): + # type: (int) -> BlockData + """ + Reads a block data + """ + # Parse the size + if type_code == TerminalCode.TC_BLOCKDATA: + size = self.__reader.read_ubyte() + elif type_code == TerminalCode.TC_BLOCKDATALONG: + size = self.__reader.read_int() + else: + raise ValueError("Invalid type code for blockdata") + + if size < 0: + raise ValueError("Invalid value for block data size") + + # Read the block + data = self.__fd.read(size) + return BlockData(data) diff --git a/javaobj/v2/main.py b/javaobj/v2/main.py new file mode 100644 index 0000000..24b51b0 --- /dev/null +++ b/javaobj/v2/main.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +""" +Mimics the core API with the new deserializer +""" + +from __future__ import absolute_import + +from typing import IO, Any # pylint:disable=W0611 + +try: + # Python 2 + from StringIO import StringIO as BytesIO +except ImportError: + # Python 3+ + from io import BytesIO + +from ..utils import java_data_fd +from .api import ObjectTransformer # pylint:disable=W0611 +from .core import JavaStreamParser +from .transformers import DefaultObjectTransformer, NumpyArrayTransformer + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +def load(file_object, *transformers, **kwargs): + # type: (IO[bytes], ObjectTransformer, Any) -> Any + """ + Deserializes Java primitive data and objects serialized using + ObjectOutputStream from a file-like object. + + :param file_object: A file-like object + :param transformers: Custom transformers to use + :return: The deserialized object + """ + # Check file format (uncompress if necessary) + file_object = java_data_fd(file_object) + + # Ensure we have the default object transformer + all_transformers = list(transformers) + for t in all_transformers: + if isinstance(t, DefaultObjectTransformer): + break + else: + all_transformers.append(DefaultObjectTransformer()) + + if kwargs.get("use_numpy_arrays", False): + # Use the numpy array transformer if requested + all_transformers.append(NumpyArrayTransformer()) + + # Parse the object(s) + parser = JavaStreamParser(file_object, all_transformers) + contents = parser.run() + + if len(contents) == 0: + # Nothing was parsed, but no error + return None + elif len(contents) == 1: + # Return the only object as is + return contents[0] + else: + # Returns all objects if they are more than one + return contents + + +def loads(data, *transformers, **kwargs): + # type: (bytes, ObjectTransformer, Any) -> Any + """ + Deserializes Java objects and primitive data serialized using + ObjectOutputStream from bytes. + + :param data: A Java data string + :param transformers: Custom transformers to use + :param ignore_remaining_data: If True, don't log an error when unused + trailing bytes are remaining + :return: The deserialized object + """ + return load(BytesIO(data), *transformers, **kwargs) diff --git a/javaobj/v2/stream.py b/javaobj/v2/stream.py new file mode 100644 index 0000000..7cb8a9f --- /dev/null +++ b/javaobj/v2/stream.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Utility module to handle streams like in Java + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from __future__ import absolute_import + +import struct +from typing import IO, Any, Tuple # pylint:disable=W0611 + +from ..modifiedutf8 import decode_modified_utf8 +from ..utils import UNICODE_TYPE, unicode_char # pylint:disable=W0611 + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class DataStreamReader: + """ + Reads the given file object with object input stream-like methods + """ + + def __init__(self, fd): + # type: (IO[bytes]) -> None + """ + :param fd: The input stream + """ + self.__fd = fd + + @property + def file_descriptor(self): + # type: () -> IO[bytes] + """ + The underlying file descriptor + """ + return self.__fd + + def read(self, struct_format): + # type: (str) -> Tuple[Any, ...] + """ + Reads from the input stream, using struct + + :param struct_format: An unpack format string + :return: The result of struct.unpack (tuple) + :raise EOFError: End of stream reached during unpacking + """ + length = struct.calcsize(struct_format) + bytes_array = self.__fd.read(length) + + if len(bytes_array) != length: + raise EOFError("Stream has ended unexpectedly while parsing.") + + return struct.unpack(struct_format, bytes_array) + + def read_bool(self): + # type: () -> bool + """ + Shortcut to read a single `boolean` (1 byte) + """ + return bool(self.read(">B")[0]) + + def read_byte(self): + # type: () -> int + """ + Shortcut to read a single `byte` (1 byte) + """ + return self.read(">b")[0] + + def read_ubyte(self): + # type: () -> int + """ + Shortcut to read an unsigned `byte` (1 byte) + """ + return self.read(">B")[0] + + def read_char(self): + # type: () -> UNICODE_TYPE + """ + Shortcut to read a single `char` (2 bytes) + """ + return unicode_char(self.read(">H")[0]) + + def read_short(self): + # type: () -> int + """ + Shortcut to read a single `short` (2 bytes) + """ + return self.read(">h")[0] + + def read_ushort(self): + # type: () -> int + """ + Shortcut to read an unsigned `short` (2 bytes) + """ + return self.read(">H")[0] + + def read_int(self): + # type: () -> int + """ + Shortcut to read a single `int` (4 bytes) + """ + return self.read(">i")[0] + + def read_float(self): + # type: () -> float + """ + Shortcut to read a single `float` (4 bytes) + """ + return self.read(">f")[0] + + def read_long(self): + # type: () -> int + """ + Shortcut to read a single `long` (8 bytes) + """ + return self.read(">q")[0] + + def read_double(self): + # type: () -> float + """ + Shortcut to read a single `double` (8 bytes) + """ + return self.read(">d")[0] + + def read_UTF(self): # pylint:disable=C0103 + # type: () -> str + """ + Reads a Java string + """ + length = self.read_ushort() + ba = self.__fd.read(length) + return decode_modified_utf8(ba)[0] diff --git a/javaobj/v2/transformers.py b/javaobj/v2/transformers.py new file mode 100644 index 0000000..087eea9 --- /dev/null +++ b/javaobj/v2/transformers.py @@ -0,0 +1,534 @@ +#!/usr/bin/env python3 +""" +Defines the default object transformers + +:authors: Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Standard library +import functools +from typing import List, Optional, Tuple + +# Numpy (optional) +try: + import numpy +except ImportError: + numpy = None # type: ignore + +# Javaobj +from ..constants import TerminalCode, TypeCode +from ..utils import log_debug, log_error, read_string, read_struct, to_bytes +from .api import IJavaStreamParser, ObjectTransformer +from .beans import ( # pylint:disable=W0611 + BlockData, + JavaClassDesc, + JavaInstance, +) +from .stream import DataStreamReader + +# ------------------------------------------------------------------------------ + +# Module version +__version_info__ = (0, 4, 4) +__version__ = ".".join(str(x) for x in __version_info__) + +# Documentation strings format +__docformat__ = "restructuredtext en" + +# ------------------------------------------------------------------------------ + + +class JavaList(list, JavaInstance): + """ + Python-Java list bridge type + """ + + HANDLED_CLASSES = ("java.util.ArrayList", "java.util.LinkedList") + + def __init__(self): + list.__init__(self) + JavaInstance.__init__(self) + + def load_from_instance(self, indent=0): + # type: (int) -> bool + """ + Load content from a parsed instance object + """ + # Lists have their content in there annotations + for cd, annotations in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + self.extend(ann for ann in annotations[1:]) + return True + + return False + + +@functools.total_ordering +class JavaPrimitiveClass(JavaInstance): + """ + Parent of Java classes matching a primitive (Bool, Integer, Long, ...) + """ + + def __init__(self): + JavaInstance.__init__(self) + self.value = None + + def __str__(self): + return str(self.value) + + def __repr__(self): + return repr(self.value) + + def __hash__(self): + return hash(self.value) + + def __eq__(self, other): + return self.value == other + + def __lt__(self, other): + return self.value < other + + def load_from_instance(self, indent=0): + # type: (int) -> bool + """ + Load content from a parsed instance object + """ + for fields in self.field_data.values(): + for field, value in fields.items(): + if field.name == "value": + self.value = value + return True + + return False + + +class JavaBool(JavaPrimitiveClass): + """ + Represents a Java Boolean object + """ + + HANDLED_CLASSES = "java.lang.Boolean" + + def __bool__(self): + return self.value + + +class JavaInt(JavaPrimitiveClass): + """ + Represents a Java Integer or Long object + """ + + HANDLED_CLASSES = ("java.lang.Integer", "java.lang.Long") + + def __int__(self): + return self.value + + +class JavaMap(dict, JavaInstance): + """ + Python-Java dictionary/map bridge type + """ + + HANDLED_CLASSES = ( + "java.util.HashMap", + "java.util.TreeMap", + ) # type: Tuple[str, ...] + + def __init__(self): + dict.__init__(self) + JavaInstance.__init__(self) + + def load_from_instance(self, indent=0): + # type: (int) -> bool + """ + Load content from a parsed instance object + """ + # Maps have their content in there annotations + for cd, annotations in self.annotations.items(): + if cd.name in JavaMap.HANDLED_CLASSES: + # Group annotation elements 2 by 2 + args = [iter(annotations[1:])] * 2 + for key, value in zip(*args): + self[key] = value + + return True + + return False + + +class JavaLinkedHashMap(JavaMap): + """ + Linked has map are handled with a specific block data + """ + + HANDLED_CLASSES = ("java.util.LinkedHashMap",) + + def load_from_blockdata(self, parser, reader, indent=0): + # type: (IJavaStreamParser, DataStreamReader, int) -> bool + """ + Loads the content of the map, written with a custom implementation + """ + # Read HashMap fields + self.buckets = reader.read_int() + self.size = reader.read_int() + + # Read entries + for _ in range(self.size): + key_code = reader.read_byte() + key = parser._read_content(key_code, True) + + value_code = reader.read_byte() + value = parser._read_content(value_code, True) + self[key] = value + + # Ignore the end of the blockdata + type_code = reader.read_byte() + if type_code != TerminalCode.TC_ENDBLOCKDATA: + raise ValueError("Didn't find the end of block data") + + # Ignore the trailing 0 + final_byte = reader.read_byte() + if final_byte != 0: + raise ValueError("Should find 0x0, got {0:x}".format(final_byte)) + + return True + + +class JavaSet(set, JavaInstance): + """ + Python-Java set bridge type + """ + + HANDLED_CLASSES = ( + "java.util.HashSet", + "java.util.LinkedHashSet", + ) # type: Tuple[str, ...] + + def __init__(self): + set.__init__(self) + JavaInstance.__init__(self) + + def load_from_instance(self, indent=0): + # type: (int) -> bool + """ + Load content from a parsed instance object + """ + # Lists have their content in there annotations + for cd, annotations in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + self.update(x for x in annotations[1:]) + return True + + return False + + +class JavaTreeSet(JavaSet): + """ + Tree sets are handled a bit differently + """ + + HANDLED_CLASSES = ("java.util.TreeSet",) + + def load_from_instance(self, indent=0): + # type: (int) -> bool + """ + Load content from a parsed instance object + """ + # Lists have their content in there annotations + for cd, annotations in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + # Annotation[1] == size of the set + self.update(x for x in annotations[2:]) + return True + + return False + + +class JavaTime(JavaInstance): + """ + Represents the classes found in the java.time package + + The semantic of the fields depends on the type of time that has been + parsed + """ + + HANDLED_CLASSES = ("java.time.Ser",) # type: Tuple[str, ...] + + DURATION_TYPE = 1 + INSTANT_TYPE = 2 + LOCAL_DATE_TYPE = 3 + LOCAL_TIME_TYPE = 4 + LOCAL_DATE_TIME_TYPE = 5 + ZONE_DATE_TIME_TYPE = 6 + ZONE_REGION_TYPE = 7 + ZONE_OFFSET_TYPE = 8 + OFFSET_TIME_TYPE = 9 + OFFSET_DATE_TIME_TYPE = 10 + YEAR_TYPE = 11 + YEAR_MONTH_TYPE = 12 + MONTH_DAY_TYPE = 13 + PERIOD_TYPE = 14 + + def __init__(self): + JavaInstance.__init__(self) + self.type = -1 + self.year = None + self.month = None + self.day = None + self.hour = None + self.minute = None + self.second = None + self.nano = None + self.offset = None + self.zone = None + + self.time_handlers = { + self.DURATION_TYPE: self.do_duration, + self.INSTANT_TYPE: self.do_instant, + self.LOCAL_DATE_TYPE: self.do_local_date, + self.LOCAL_DATE_TIME_TYPE: self.do_local_date_time, + self.LOCAL_TIME_TYPE: self.do_local_time, + self.ZONE_DATE_TIME_TYPE: self.do_zoned_date_time, + self.ZONE_OFFSET_TYPE: self.do_zone_offset, + self.ZONE_REGION_TYPE: self.do_zone_region, + self.OFFSET_TIME_TYPE: self.do_offset_time, + self.OFFSET_DATE_TIME_TYPE: self.do_offset_date_time, + self.YEAR_TYPE: self.do_year, + self.YEAR_MONTH_TYPE: self.do_year_month, + self.MONTH_DAY_TYPE: self.do_month_day, + self.PERIOD_TYPE: self.do_period, + } + + def __str__(self): + return ( + "JavaTime(type=0x{s.type}, " + "year={s.year}, month={s.month}, day={s.day}, " + "hour={s.hour}, minute={s.minute}, second={s.second}, " + "nano={s.nano}, offset={s.offset}, zone={s.zone})" + ).format(s=self) + + def load_from_blockdata(self, parser, reader, indent=0): + """ + Ignore the SC_BLOCK_DATA flag + """ + return True + + def load_from_instance(self, indent=0): + # type: (int) -> bool + """ + Load content from a parsed instance object + """ + # Lists have their content in there annotations + for cd, annotations in self.annotations.items(): + if cd.name in self.HANDLED_CLASSES: + if not isinstance(annotations[0], BlockData): + raise ValueError("Require a BlockData as annotation") + + # Convert back annotations to bytes + # latin-1 is used to ensure that bytes are kept as is + content = to_bytes(annotations[0].data, "latin1") + (self.type,), content = read_struct(content, ">b") + + try: + self.time_handlers[self.type](content) + except KeyError as ex: + log_error("Unhandled kind of time: {}".format(ex)) + + return True + + return False + + def do_duration(self, data): + (self.second, self.nano), data = read_struct(data, ">qi") + return data + + def do_instant(self, data): + (self.second, self.nano), data = read_struct(data, ">qi") + return data + + def do_local_date(self, data): + (self.year, self.month, self.day), data = read_struct(data, ">ibb") + return data + + def do_local_time(self, data): + (hour,), data = read_struct(data, ">b") + minute = 0 + second = 0 + nano = 0 + + if hour < 0: + hour = ~hour + else: + (minute,), data = read_struct(data, ">b") + if minute < 0: + minute = ~minute + else: + (second,), data = read_struct(data, ">b") + if second < 0: + second = ~second + else: + (nano,), data = read_struct(data, ">i") + + self.hour = hour + self.minute = minute + self.second = second + self.nano = nano + return data + + def do_local_date_time(self, data): + data = self.do_local_date(data) + data = self.do_local_time(data) + return data + + def do_zoned_date_time(self, data): + data = self.do_local_date_time(data) + data = self.do_zone_offset(data) + data = self.do_zone_region(data) + return data + + def do_zone_offset(self, data): + (offset_byte,), data = read_struct(data, ">b") + if offset_byte == 127: + (self.offset,), data = read_struct(data, ">i") + else: + self.offset = offset_byte * 900 + return data + + def do_zone_region(self, data): + self.zone, data = read_string(data) + return data + + def do_offset_time(self, data): + data = self.do_local_time(data) + data = self.do_zone_offset(data) + return data + + def do_offset_date_time(self, data): + data = self.do_local_date_time(data) + data = self.do_zone_offset(data) + return data + + def do_year(self, data): + (self.year,), data = read_struct(data, ">i") + return data + + def do_year_month(self, data): + (self.year, self.month), data = read_struct(data, ">ib") + return data + + def do_month_day(self, data): + (self.month, self.day), data = read_struct(data, ">bb") + return data + + def do_period(self, data): + (self.year, self.month, self.day), data = read_struct(data, ">iii") + return data + + +class DefaultObjectTransformer(ObjectTransformer): + """ + Provider of the default object transformers + """ + + KNOWN_TRANSFORMERS = ( + JavaBool, + JavaInt, + JavaList, + JavaMap, + JavaLinkedHashMap, + JavaSet, + JavaTreeSet, + JavaTime, + ) + + def __init__(self): + # Construct the link: Java class name -> Python transformer + self._type_mapper = {} + for transformer_class in self.KNOWN_TRANSFORMERS: + handled_classes = transformer_class.HANDLED_CLASSES + if isinstance(handled_classes, str): + # Single class handled + self._type_mapper[handled_classes] = transformer_class + else: + # Multiple classes handled + for class_name in transformer_class.HANDLED_CLASSES: + self._type_mapper[class_name] = transformer_class + + def create_instance(self, classdesc): + # type: (JavaClassDesc) -> Optional[JavaInstance] + """ + Transforms a parsed Java object into a Python object + + :param classdesc: The description of a Java class + :return: The Python form of the object, or the original JavaObject + """ + try: + mapped_type = self._type_mapper[classdesc.name] + except KeyError: + # Return None if not handled + return None + else: + log_debug("---") + log_debug(classdesc.name) + log_debug("---") + + java_object = mapped_type() + java_object.classdesc = classdesc + + log_debug(">>> java_object: {0}".format(java_object)) + return java_object + + +class NumpyArrayTransformer(ObjectTransformer): + """ + Loads arrays as numpy arrays if possible + """ + + # Convertion of a Java type char to its NumPy equivalent + NUMPY_TYPE_MAP = { + TypeCode.TYPE_BYTE: "B", + TypeCode.TYPE_CHAR: "b", + TypeCode.TYPE_DOUBLE: ">d", + TypeCode.TYPE_FLOAT: ">f", + TypeCode.TYPE_INTEGER: ">i", + TypeCode.TYPE_LONG: ">l", + TypeCode.TYPE_SHORT: ">h", + TypeCode.TYPE_BOOLEAN: ">B", + } + + def load_array(self, reader, type_code, size): + # type: (DataStreamReader, TypeCode, int) -> Optional[list] + """ + Loads a Java array, if possible + """ + if numpy is not None: + try: + dtype = self.NUMPY_TYPE_MAP[type_code] + except KeyError: + # Unhandled data type + return None + else: + return numpy.fromfile( + reader.file_descriptor, dtype=dtype, count=size, + ) + + return None diff --git a/manifest.in b/manifest.in new file mode 100644 index 0000000..cf4e570 --- /dev/null +++ b/manifest.in @@ -0,0 +1,8 @@ +# Include the README +include README.md + +# Include the authors file +include AUTHORS + +# Include the license file +include LICENSE diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8789351 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,58 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["javaobj"] + +[project] +name = "javaobj-py3" +version = "0.4.4" +description = "Module for serializing and de-serializing Java objects." +readme = "README.md" +license = "Apache-2.0" +authors = [ + { name = "Volodymyr Buell", email = "vbuell@gmail.com" } +] +maintainers = [ + { name = "Thomas Calmant", email = "thomas.calmant@gmail.com" } +] +keywords = ["python", "java", "marshalling", "serialization"] +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules" +] + +dependencies = [ + "enum34; python_version<='3.4'", + "typing; python_version<='3.4'" +] + +[project.optional-dependencies] +test = ["pytest"] + +[project.urls] +Homepage = "https://github.com/tcalmant/python-javaobj" +Issues = "http://github.com/tcalmant/python-javaobj/issues" +Source = "http://github.com/tcalmant/python-javaobj/" + +[tool.hatch.envs.test] +dependencies = ["pytest"] + +[tool.hatch.envs.test.scripts] +run = "pytest tests" + +[tool.black] +line-length = 79 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..17b0412 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +enum34;python_version<="3.4" +typing;python_version<="3.4" diff --git a/setup.py b/setup.py index dbe219a..cf93fb8 100644 --- a/setup.py +++ b/setup.py @@ -7,12 +7,12 @@ :authors: Volodymyr Buell, Thomas Calmant :license: Apache License 2.0 -:version: 0.2.4 +:version: 0.4.4 :status: Alpha .. - Copyright 2016 Thomas Calmant + Copyright 2024 Thomas Calmant Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ # ------------------------------------------------------------------------------ # Module version -__version_info__ = (0, 2, 4) +__version_info__ = (0, 4, 4) __version__ = ".".join(str(x) for x in __version_info__) # Documentation strings format @@ -53,8 +53,10 @@ def read(fname): with open(os.path.join(os.path.dirname(__file__), fname)) as fd: return fd.read() + # ------------------------------------------------------------------------------ + setup( name="javaobj-py3", version=__version__, @@ -64,18 +66,31 @@ def read(fname): maintainer_email="thomas.calmant@gmail.com", url="https://github.com/tcalmant/python-javaobj", description="Module for serializing and de-serializing Java objects.", - license='Apache License 2.0', + license="Apache License 2.0", + license_file="LICENSE", keywords="python java marshalling serialization", - py_modules=['javaobj'], - test_suite="tests.tests", - long_description=read('README.rst'), + packages=["javaobj", "javaobj.v1", "javaobj.v2"], + test_suite="tests", + install_requires=[ + 'enum34;python_version<="3.4"', + 'typing;python_version<="3.4"', + ], + long_description=read("README.md"), + long_description_content_type="text/markdown", classifiers=[ - "Development Status :: 3 - Alpha", - "License :: OSI Approved :: Apache Software License", - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - "Topic :: Software Development :: Libraries :: Python Modules", - ]) + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", + ], +) diff --git a/tests/java/.classpath b/tests/java/.classpath deleted file mode 100644 index 61c3fab..0000000 --- a/tests/java/.classpath +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/tests/java/.gitignore b/tests/java/.gitignore index 073063f..562f411 100644 --- a/tests/java/.gitignore +++ b/tests/java/.gitignore @@ -4,3 +4,8 @@ target/ # Generated files *.ser +# Project files +.idea/ +.classpath +.project +.settings/ diff --git a/tests/java/pom.xml b/tests/java/pom.xml index 12f7250..d4cc75d 100644 --- a/tests/java/pom.xml +++ b/tests/java/pom.xml @@ -9,14 +9,16 @@ UTF-8 + 1.7 + 1.7 junit junit - 4.9 + 4.13.1 test - \ No newline at end of file + diff --git a/tests/java/src/test/java/OneTest.java b/tests/java/src/test/java/OneTest.java index 7e2025f..7ffb10a 100644 --- a/tests/java/src/test/java/OneTest.java +++ b/tests/java/src/test/java/OneTest.java @@ -1,13 +1,29 @@ import java.awt.event.WindowAdapter; import java.awt.event.WindowEvent; import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.util.HashMap; +import java.util.HashSet; import java.util.Hashtable; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; import java.util.Vector; +import java.util.Random; +import java.util.zip.GZIPOutputStream; import javax.swing.JScrollPane; import javax.swing.SwingUtilities; @@ -154,10 +170,16 @@ private void writeObject(final ObjectOutputStream oos) ObjectOutputStream oos; + /** + * Returns the name of the file where to serialize the test content + */ + private String getTestFileName() { + return name.getMethodName() + ".ser"; + } + @Before public void setUp() throws Exception { - oos = new ObjectOutputStream(fos = new FileOutputStream( - name.getMethodName() + ".ser")); + oos = new ObjectOutputStream(fos = new FileOutputStream(getTestFileName())); } @Test @@ -194,6 +216,60 @@ public void testChar() throws IOException { public void testChars() throws IOException { oos.writeChars("python-javaobj"); oos.close(); + + // Also compress the file + final String serializedFileName = getTestFileName(); + final String gzippedFileName = serializedFileName + ".gz"; + + try (final GZIPOutputStream out = new GZIPOutputStream(new FileOutputStream(gzippedFileName))){ + try (final FileInputStream in = new FileInputStream(serializedFileName)){ + final byte[] buffer = new byte[1024]; + int len; + while((len = in.read(buffer)) != -1){ + out.write(buffer, 0, len); + } + } + } + } + + @Test + public void testCharArray() throws IOException { + char[] array = new char[] { + '\u0000', '\ud800', + '\u0001', '\udc00', + '\u0002', '\uffff', + '\u0003' + }; + oos.writeObject(array); + oos.close(); + } + + @Test + public void test2DArray() throws IOException { + int[][] array = new int[][] { + new int[] {1, 2, 3}, + new int[] {4, 5, 6}, + }; + oos.writeObject(array); + oos.close(); + } + + @Test + public void testClassArray() throws IOException { + Class[] array = new Class[] { + Integer.class, + ObjectOutputStream.class, + Exception.class, + }; + oos.writeObject(array); + oos.close(); + } + + @Test + public void testJapan() throws IOException { + String stateOfJapan = "日本国"; + oos.writeObject(stateOfJapan); + oos.close(); } @Test @@ -249,6 +325,81 @@ public void testSuper() throws Exception { oos.flush(); } + @Test + public void testHashSet() throws Exception { + final Set set = new HashSet(); + set.add(1); + set.add(2); + set.add(1); + set.add(42); + oos.writeObject(set); + oos.flush(); + } + + @Test + public void testLinkedHashSet() throws Exception { + final Set set = new LinkedHashSet(); + set.add(1); + set.add(2); + set.add(1); + set.add(42); + oos.writeObject(set); + oos.flush(); + } + + @Test + public void testTreeSet() throws Exception { + final Set set = new TreeSet(); + set.add(1); + set.add(2); + set.add(1); + set.add(42); + oos.writeObject(set); + oos.flush(); + } + + @Test + public void testTime() throws Exception { + oos.writeObject(new Object[] { + Duration.ofSeconds(10), + Instant.now(), + LocalDate.now(), + LocalTime.now(), + LocalDateTime.now(), + ZoneId.systemDefault(), + ZonedDateTime.now(), + }); + oos.flush(); + } + + /** + * Tests th pull request #27 by @qistoph: + * Add support for java.lang.Bool, Integer and Long classes + */ + @Test + public void testBoolIntLong() throws Exception { + Map hm1 = new HashMap(); + hm1.put("key1", "value1"); + hm1.put("key2", "value2"); + hm1.put("int", 9); + hm1.put("int2", new Integer(10)); + hm1.put("bool", true); + hm1.put("bool2", new Boolean(true)); + + oos.writeObject(hm1); + oos.flush(); + + Map hm2 = new HashMap(); + hm2.put("subMap", hm1); + + ObjectOutputStream oos2 = new ObjectOutputStream(new FileOutputStream(name.getMethodName() + "-2.ser")); + try { + oos2.writeObject(hm2); + } finally { + oos2.close(); + } + } + @Test public void testSwingObject() throws Exception { @@ -281,115 +432,70 @@ public void windowClosing(final WindowEvent e) { }); } - // public void test_readObject() throws Exception { - // String s = "HelloWorld"; - // oos.writeObject(s); - // oos.close(); - // ois = new ObjectInputStream(new ByteArrayInputStream(bao.toByteArray())); - // assertEquals("Read incorrect Object value", s, ois.readObject()); - // ois.close(); - // - // // Regression for HARMONY-91 - // // dynamically create serialization byte array for the next hierarchy: - // // - class A implements Serializable - // // - class C extends A - // - // byte[] cName = C.class.getName().getBytes("UTF-8"); - // byte[] aName = A.class.getName().getBytes("UTF-8"); - // - // ByteArrayOutputStream out = new ByteArrayOutputStream(); - // - // byte[] begStream = new byte[] { (byte) 0xac, (byte) 0xed, // STREAM_MAGIC - // (byte) 0x00, (byte) 0x05, // STREAM_VERSION - // (byte) 0x73, // TC_OBJECT - // (byte) 0x72, // TC_CLASSDESC - // (byte) 0x00, // only first byte for C class name length - // }; - // - // out.write(begStream, 0, begStream.length); - // out.write(cName.length); // second byte for C class name length - // out.write(cName, 0, cName.length); // C class name - // - // byte[] midStream = new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0x00, - // (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, - // (byte) 0x21, // serialVersionUID = 33L - // (byte) 0x02, // flags - // (byte) 0x00, (byte) 0x00, // fields : none - // (byte) 0x78, // TC_ENDBLOCKDATA - // (byte) 0x72, // Super class for C: TC_CLASSDESC for A class - // (byte) 0x00, // only first byte for A class name length - // }; - // - // out.write(midStream, 0, midStream.length); - // out.write(aName.length); // second byte for A class name length - // out.write(aName, 0, aName.length); // A class name - // - // byte[] endStream = new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0x00, - // (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, - // (byte) 0x0b, // serialVersionUID = 11L - // (byte) 0x02, // flags - // (byte) 0x00, (byte) 0x01, // fields - // - // (byte) 0x4c, // field description: type L (object) - // (byte) 0x00, (byte) 0x04, // length - // // field = 'name' - // (byte) 0x6e, (byte) 0x61, (byte) 0x6d, (byte) 0x65, - // - // (byte) 0x74, // className1: TC_STRING - // (byte) 0x00, (byte) 0x12, // length - // // - // (byte) 0x4c, (byte) 0x6a, (byte) 0x61, (byte) 0x76, - // (byte) 0x61, (byte) 0x2f, (byte) 0x6c, (byte) 0x61, - // (byte) 0x6e, (byte) 0x67, (byte) 0x2f, (byte) 0x53, - // (byte) 0x74, (byte) 0x72, (byte) 0x69, (byte) 0x6e, - // (byte) 0x67, (byte) 0x3b, - // - // (byte) 0x78, // TC_ENDBLOCKDATA - // (byte) 0x70, // NULL super class for A class - // - // // classdata - // (byte) 0x74, // TC_STRING - // (byte) 0x00, (byte) 0x04, // length - // (byte) 0x6e, (byte) 0x61, (byte) 0x6d, (byte) 0x65, // value - // }; - // - // out.write(endStream, 0, endStream.length); - // out.flush(); - // - // // read created serial. form - // ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream( - // out.toByteArray())); - // Object o = ois.readObject(); - // assertEquals(C.class, o.getClass()); - // - // // Regression for HARMONY-846 - // assertNull(new ObjectInputStream() {}.readObject()); - // } + /** + * Tests the pull request #38 by @UruDev: + * Add support for custom writeObject + */ + @Test + public void testCustomWriteObject() throws Exception { + CustomClass writer = new CustomClass(); + writer.start(oos); + } } class SuperAaaa implements Serializable { - - /** - * - */ private static final long serialVersionUID = 1L; public boolean bool = true; public int integer = -1; public String superString = "Super!!"; - } class TestConcrete extends SuperAaaa implements Serializable { - - /** - * - */ private static final long serialVersionUID = 1L; public String childString = "Child!!"; TestConcrete() { super(); } +} + +//Custom writeObject section +class CustomClass implements Serializable { + private static final long serialVersionUID = 1; + + public void start(ObjectOutputStream out) throws Exception { + this.writeObject(out); + } + + private void writeObject(ObjectOutputStream out) throws IOException { + CustomWriter custom = new CustomWriter(42); + out.writeObject(custom); + out.flush(); + } +} + +class RandomChild extends Random { + private static final long serialVersionUID = 1; + private int num = 1; + private double doub = 4.5; + + RandomChild(int seed) { + super(seed); + } +} + +class CustomWriter implements Serializable { + protected RandomChild custom_obj = null; + + CustomWriter(int seed) { + custom_obj = new RandomChild(seed); + } + private static final long serialVersionUID = 1; + private static final int CURRENT_SERIAL_VERSION = 0; + private void writeObject(ObjectOutputStream out) throws IOException { + out.writeInt(CURRENT_SERIAL_VERSION); + out.writeObject(custom_obj); + } } diff --git a/tests/java/testChars.ser.gz b/tests/java/testChars.ser.gz new file mode 100644 index 0000000..f1f9db1 Binary files /dev/null and b/tests/java/testChars.ser.gz differ diff --git a/tests/test2DArray.ser b/tests/test2DArray.ser new file mode 100644 index 0000000..d0f58dc Binary files /dev/null and b/tests/test2DArray.ser differ diff --git a/tests/testBoolIntLong-2.ser b/tests/testBoolIntLong-2.ser new file mode 100644 index 0000000..aae4a2b Binary files /dev/null and b/tests/testBoolIntLong-2.ser differ diff --git a/tests/testBoolIntLong.ser b/tests/testBoolIntLong.ser new file mode 100644 index 0000000..daa6bc1 Binary files /dev/null and b/tests/testBoolIntLong.ser differ diff --git a/tests/testChars.ser.gz b/tests/testChars.ser.gz new file mode 100644 index 0000000..f1f9db1 Binary files /dev/null and b/tests/testChars.ser.gz differ diff --git a/tests/testClassArray.ser b/tests/testClassArray.ser new file mode 100644 index 0000000..e5501ae Binary files /dev/null and b/tests/testClassArray.ser differ diff --git a/tests/testCustomWriteObject.ser b/tests/testCustomWriteObject.ser new file mode 100644 index 0000000..72e77af Binary files /dev/null and b/tests/testCustomWriteObject.ser differ diff --git a/tests/testHashSet.ser b/tests/testHashSet.ser new file mode 100644 index 0000000..85776fd Binary files /dev/null and b/tests/testHashSet.ser differ diff --git a/tests/testJapan.ser b/tests/testJapan.ser new file mode 100644 index 0000000..2c3634a Binary files /dev/null and b/tests/testJapan.ser differ diff --git a/tests/testLinkedHashSet.ser b/tests/testLinkedHashSet.ser new file mode 100644 index 0000000..449edd8 Binary files /dev/null and b/tests/testLinkedHashSet.ser differ diff --git a/tests/testTime.ser b/tests/testTime.ser new file mode 100644 index 0000000..92fe968 Binary files /dev/null and b/tests/testTime.ser differ diff --git a/tests/testTreeSet.ser b/tests/testTreeSet.ser new file mode 100644 index 0000000..2efdfa1 Binary files /dev/null and b/tests/testTreeSet.ser differ diff --git a/tests/tests.py b/tests/test_v1.py similarity index 62% rename from tests/tests.py rename to tests/test_v1.py index cfe358f..162b2db 100644 --- a/tests/tests.py +++ b/tests/test_v1.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -- Content-Encoding: UTF-8 -- +# -- Content-Encoding: utf-8 -- """ Tests for javaobj @@ -8,12 +8,12 @@ :authors: Volodymyr Buell, Thomas Calmant :license: Apache License 2.0 -:version: 0.2.3 +:version: 0.4.4 :status: Alpha .. - Copyright 2016 Thomas Calmant + Copyright 2024 Thomas Calmant Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,18 +28,22 @@ limitations under the License. """ +# Print is used in tests +from __future__ import print_function + # Standard library import logging -import subprocess -import unittest import os +import subprocess import sys +import unittest # Prepare Python path to import javaobj sys.path.insert(0, os.path.abspath(os.path.dirname(os.getcwd()))) # Local -import javaobj +import javaobj.v1 as javaobj +from javaobj.utils import hexdump, java_data_fd # ------------------------------------------------------------------------------ @@ -51,10 +55,11 @@ # ------------------------------------------------------------------------------ -class TestJavaobj(unittest.TestCase): +class TestJavaobjV1(unittest.TestCase): """ - Full test suite for javaobj + Full test suite for javaobj V1 parser """ + @classmethod def setUpClass(cls): """ @@ -62,13 +67,14 @@ def setUpClass(cls): data """ # Compute the java directory - java_dir = os.path.join(os.path.dirname(__file__), 'java') + java_dir = os.path.join(os.path.dirname(__file__), "java") - # Run Maven and go back to the working folder - cwd = os.getcwd() - os.chdir(java_dir) - subprocess.call('mvn test', shell=True) - os.chdir(cwd) + if not os.getenv("JAVAOBJ_NO_MAVEN"): + # Run Maven and go back to the working folder + cwd = os.getcwd() + os.chdir(java_dir) + subprocess.call("mvn test", shell=True) + os.chdir(cwd) def read_file(self, filename, stream=False): """ @@ -78,9 +84,10 @@ def read_file(self, filename, stream=False): :param stream: If True, return the file stream :return: File content or stream """ - for subfolder in ('java', ''): + for subfolder in ("java", ""): found_file = os.path.join( - os.path.dirname(__file__), subfolder, filename) + os.path.dirname(__file__), subfolder, filename + ) if os.path.exists(found_file): break else: @@ -89,26 +96,25 @@ def read_file(self, filename, stream=False): if stream: return open(found_file, "rb") else: - with open(found_file, 'rb') as filep: + with open(found_file, "rb") as filep: return filep.read() def _try_marshalling(self, original_stream, original_object): """ Tries to marshall an object and compares it to the original stream """ + _logger.debug("Try Marshalling") marshalled_stream = javaobj.dumps(original_object) # Reloading the new dump allows to compare the decoding sequence try: javaobj.loads(marshalled_stream) self.assertEqual(original_stream, marshalled_stream) - except: + except Exception: print("-" * 80) print("=" * 30, "Original", "=" * 30) - print(javaobj.JavaObjectUnmarshaller._create_hexdump( - original_stream)) + print(hexdump(original_stream)) print("*" * 30, "Marshalled", "*" * 30) - print(javaobj.JavaObjectUnmarshaller._create_hexdump( - marshalled_stream)) + print(hexdump(marshalled_stream)) print("-" * 80) raise @@ -119,7 +125,7 @@ def test_char_rw(self): jobj = self.read_file("testChar.ser") pobj = javaobj.loads(jobj) _logger.debug("Read char object: %s", pobj) - self.assertEqual(pobj, '\x00C') + self.assertEqual(pobj, "\x00C") self._try_marshalling(jobj, pobj) def test_chars_rw(self): @@ -135,6 +141,34 @@ def test_chars_rw(self): self.assertEqual(pobj, expected) self._try_marshalling(jobj, pobj) + def test_gzip_open(self): + """ + Tests if the GZip auto-uncompress works + """ + with java_data_fd(self.read_file("testChars.ser", stream=True)) as fd: + base = fd.read() + + with java_data_fd( + self.read_file("testChars.ser.gz", stream=True) + ) as fd: + gzipped = fd.read() + + self.assertEqual( + base, gzipped, "Uncompressed content doesn't match the original" + ) + + def test_chars_gzip(self): + """ + Reads testChars.ser.gz + """ + # Expected string as a UTF-16 string + expected = "python-javaobj".encode("utf-16-be").decode("latin1") + + jobj = self.read_file("testChars.ser.gz") + pobj = javaobj.loads(jobj) + _logger.debug("Read char objects: %s", pobj) + self.assertEqual(pobj, expected) + def test_double_rw(self): """ Reads testDouble.ser and checks the serialization process @@ -143,7 +177,7 @@ def test_double_rw(self): pobj = javaobj.loads(jobj) _logger.debug("Read double object: %s", pobj) - self.assertEqual(pobj, '\x7f\xef\xff\xff\xff\xff\xff\xff') + self.assertEqual(pobj, "\x7f\xef\xff\xff\xff\xff\xff\xff") self._try_marshalling(jobj, pobj) def test_bytes_rw(self): @@ -154,10 +188,13 @@ def test_bytes_rw(self): pobj = javaobj.loads(jobj) _logger.debug("Read bytes: %s", pobj) - self.assertEqual(pobj, 'HelloWorld') + self.assertEqual(pobj, "HelloWorld") self._try_marshalling(jobj, pobj) def test_class_with_byte_array_rw(self): + """ + Tests handling of classes containing a Byte Array + """ jobj = self.read_file("testClassWithByteArray.ser") pobj = javaobj.loads(jobj) @@ -201,7 +238,7 @@ def test_fields(self): pobj = javaobj.loads(jobj) _logger.debug("Read object: %s", pobj) - self.assertEqual(pobj.aField1, 'Gabba') + self.assertEqual(pobj.aField1, u"Gabba") self.assertEqual(pobj.aField2, None) classdesc = pobj.get_class() @@ -224,7 +261,7 @@ def test_class(self): jobj = self.read_file("testClass.ser") pobj = javaobj.loads(jobj) _logger.debug("Read object: %s", pobj) - self.assertEqual(pobj.name, 'java.lang.String') + self.assertEqual(pobj.name, "java.lang.String") self._try_marshalling(jobj, pobj) # def test_swing_object(self): @@ -241,6 +278,9 @@ def test_class(self): # _logger.debug(".. Fields Types: %s", classdesc.fields_types) def test_super(self): + """ + Tests basic class inheritance handling + """ jobj = self.read_file("objSuper.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) @@ -250,14 +290,17 @@ def test_super(self): _logger.debug(classdesc.fields_names) _logger.debug(classdesc.fields_types) - self.assertEqual(pobj.childString, "Child!!") + self.assertEqual(pobj.childString, u"Child!!") self.assertEqual(pobj.bool, True) self.assertEqual(pobj.integer, -1) - self.assertEqual(pobj.superString, "Super!!") + self.assertEqual(pobj.superString, u"Super!!") self._try_marshalling(jobj, pobj) def test_arrays(self): + """ + Tests handling of Java arrays + """ jobj = self.read_file("objArrays.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) @@ -280,14 +323,57 @@ def test_arrays(self): self._try_marshalling(jobj, pobj) + def test_japan(self): + """ + Tests the UTF encoding handling with Japanese characters + """ + # Japan.ser contains a string using wide characters: the name of the + # state from Japan (according to wikipedia) + jobj = self.read_file("testJapan.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + # Compare the UTF-8 encoded version of the name + self.assertEqual( + pobj, b"\xe6\x97\xa5\xe6\x9c\xac\xe5\x9b\xbd".decode("utf-8") + ) + self._try_marshalling(jobj, pobj) + def test_char_array(self): + """ + Tests the loading of a wide-char array + """ jobj = self.read_file("testCharArray.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) - self.assertEqual(pobj, [u'\u0000', u'\ud800', u'\u0001', u'\udc00', u'\u0002', u'\uffff', u'\u0003']) + self.assertEqual( + pobj, + [ + u"\u0000", + u"\ud800", + u"\u0001", + u"\udc00", + u"\u0002", + u"\uffff", + u"\u0003", + ], + ) self._try_marshalling(jobj, pobj) + def test_2d_array(self): + """ + Tests the handling of a 2D array + """ + jobj = self.read_file("test2DArray.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertEqual( + pobj, [[1, 2, 3], [4, 5, 6],], + ) + def test_enums(self): + """ + Tests the handling of "enum" types + """ jobj = self.read_file("objEnums.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) @@ -299,14 +385,49 @@ def test_enums(self): self.assertEqual(classdesc.name, "ClassWithEnum") self.assertEqual(pobj.color.classdesc.name, "Color") - self.assertEqual(pobj.color.constant, "GREEN") + self.assertEqual(pobj.color.constant, u"GREEN") - for color, intended in zip(pobj.colors, ("GREEN", "BLUE", "RED")): + for color, intended in zip(pobj.colors, (u"GREEN", u"BLUE", u"RED")): self.assertEqual(color.classdesc.name, "Color") self.assertEqual(color.constant, intended) # self._try_marshalling(jobj, pobj) + def test_sets(self): + """ + Tests handling of HashSet and TreeSet + """ + for filename in ( + "testHashSet.ser", + "testTreeSet.ser", + "testLinkedHashSet.ser", + ): + _logger.debug("Loading file: %s", filename) + jobj = self.read_file(filename) + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertIsInstance(pobj, set) + self.assertSetEqual({i.value for i in pobj}, {1, 2, 42}) + + def test_times(self): + """ + Tests the handling of java.time classes + """ + jobj = self.read_file("testTime.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + + # First one is a duration of 10s + duration = pobj[0] + self.assertEqual(duration.second, 10) + + # Check types + self.assertIsInstance(pobj, javaobj.beans.JavaArray) + for obj in pobj: + self.assertIsInstance( + obj, javaobj.DefaultObjectTransformer.JavaTime + ) + # def test_exception(self): # jobj = self.read_file("objException.ser") # pobj = javaobj.loads(jobj) @@ -321,19 +442,23 @@ def test_enums(self): # self.assertEqual(classdesc.name, "MyExceptionWhenDumping") def test_sun_example(self): - marshaller = javaobj.JavaObjectUnmarshaller( - self.read_file("sunExample.ser", stream=True)) - pobj = marshaller.readObject() + marshaller = javaobj.JavaObjectUnmarshaller( + self.read_file("sunExample.ser", stream=True) + ) + pobj = marshaller.readObject() - self.assertEqual(pobj.value, 17) - self.assertTrue(pobj.next) + self.assertEqual(pobj.value, 17) + self.assertTrue(pobj.next) - pobj = marshaller.readObject() + pobj = marshaller.readObject() - self.assertEqual(pobj.value, 19) - self.assertFalse(pobj.next) + self.assertEqual(pobj.value, 19) + self.assertFalse(pobj.next) def test_collections(self): + """ + Tests the handling of ArrayList, LinkedList and HashMap + """ jobj = self.read_file("objCollections.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) @@ -349,14 +474,45 @@ def test_collections(self): # self._try_marshalling(jobj, pobj) def test_jceks_issue_5(self): + """ + Tests the handling of JCEKS issue #5 + """ jobj = self.read_file("jceks_issue_5.ser") pobj = javaobj.loads(jobj) _logger.info(pobj) # self._try_marshalling(jobj, pobj) + def test_qistoph_pr_27(self): + """ + Tests support for Bool, Integer, Long classes (PR #27) + """ + # Load the basic map + jobj = self.read_file("testBoolIntLong.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + + # Basic checking + self.assertEqual(pobj[u"key1"], u"value1") + self.assertEqual(pobj[u"key2"], u"value2") + self.assertEqual(pobj[u"int"], 9) + self.assertEqual(pobj[u"int2"], 10) + self.assertEqual(pobj[u"bool"], True) + self.assertEqual(pobj[u"bool2"], True) + + # Load the parent map + jobj2 = self.read_file("testBoolIntLong-2.ser") + pobj2 = javaobj.loads(jobj2) + _logger.debug(pobj2) + + parent_map = pobj2[u"subMap"] + for key, value in pobj.items(): + self.assertEqual(parent_map[key], value) + + # ------------------------------------------------------------------------------ -if __name__ == '__main__': + +if __name__ == "__main__": # Setup logging logging.basicConfig(level=logging.INFO) diff --git a/tests/test_v2.py b/tests/test_v2.py new file mode 100644 index 0000000..301db9c --- /dev/null +++ b/tests/test_v2.py @@ -0,0 +1,656 @@ +#!/usr/bin/python +# -- Content-Encoding: utf-8 -- +""" +Tests for javaobj + +See: +http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html + +:authors: Volodymyr Buell, Thomas Calmant +:license: Apache License 2.0 +:version: 0.4.4 +:status: Alpha + +.. + + Copyright 2024 Thomas Calmant + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +# Print is used in tests +from __future__ import print_function + +# Standard library +import logging +import os +import struct +import subprocess +import sys +import unittest +from io import BytesIO + +# Prepare Python path to import javaobj +sys.path.insert(0, os.path.abspath(os.path.dirname(os.getcwd()))) + +import javaobj.v2 as javaobj + +# Local +from javaobj.utils import bytes_char, java_data_fd + +# ------------------------------------------------------------------------------ + +# Documentation strings format +__docformat__ = "restructuredtext en" + +_logger = logging.getLogger("javaobj.tests") + +# ------------------------------------------------------------------------------ + +# Custom writeObject parsing classes +class CustomWriterInstance(javaobj.beans.JavaInstance): + def __init__(self): + javaobj.beans.JavaInstance.__init__(self) + + def load_from_instance(self): + """ + Updates the content of this instance + from its parsed fields and annotations + :return: True on success, False on error + """ + if self.classdesc and self.classdesc in self.annotations: + fields = ["int_not_in_fields"] + self.classdesc.fields_names + raw_data = self.annotations[self.classdesc] + int_not_in_fields = struct.unpack( + ">i", BytesIO(raw_data[0].data).read(4) + )[0] + custom_obj = raw_data[1] + values = [int_not_in_fields, custom_obj] + self.field_data = dict(zip(fields, values)) + return True + + return False + + +class RandomChildInstance(javaobj.beans.JavaInstance): + def load_from_instance(self): + """ + Updates the content of this instance + from its parsed fields and annotations + :return: True on success, False on error + """ + if self.classdesc and self.classdesc in self.field_data: + fields = self.classdesc.fields_names + values = [ + self.field_data[self.classdesc][self.classdesc.fields[i]] + for i in range(len(fields)) + ] + self.field_data = dict(zip(fields, values)) + if ( + self.classdesc.super_class + and self.classdesc.super_class in self.annotations + ): + super_class = self.annotations[self.classdesc.super_class][0] + self.annotations = dict( + zip(super_class.fields_names, super_class.field_data) + ) + return True + + return False + + +class BaseTransformer(javaobj.transformers.ObjectTransformer): + """ + Creates a JavaInstance object with custom loading methods for the + classes it can handle + """ + + def __init__(self, handled_classes=None): + self.instance = None + self.handled_classes = handled_classes or {} + + def create_instance(self, classdesc): + """ + Transforms a parsed Java object into a Python object + + :param classdesc: The description of a Java class + :return: The Python form of the object, or the original JavaObject + """ + if classdesc.name in self.handled_classes: + self.instance = self.handled_classes[classdesc.name]() + return self.instance + + return None + + +class RandomChildTransformer(BaseTransformer): + def __init__(self): + super(RandomChildTransformer, self).__init__( + {"RandomChild": RandomChildInstance} + ) + + +class CustomWriterTransformer(BaseTransformer): + def __init__(self): + super(CustomWriterTransformer, self).__init__( + {"CustomWriter": CustomWriterInstance} + ) + + +class JavaRandomTransformer(BaseTransformer): + def __init__(self): + super(JavaRandomTransformer, self).__init__() + self.name = "java.util.Random" + self.field_names = ["haveNextNextGaussian", "nextNextGaussian", "seed"] + self.field_types = [ + javaobj.beans.FieldType.BOOLEAN, + javaobj.beans.FieldType.DOUBLE, + javaobj.beans.FieldType.LONG, + ] + + def load_custom_writeObject(self, parser, reader, name): + if name != self.name: + return None + + fields = [] + values = [] + for f_name, f_type in zip(self.field_names, self.field_types): + values.append(parser._read_field_value(f_type)) + fields.append(javaobj.beans.JavaField(f_type, f_name)) + + class_desc = javaobj.beans.JavaClassDesc( + javaobj.beans.ClassDescType.NORMALCLASS + ) + class_desc.name = self.name + class_desc.desc_flags = javaobj.beans.ClassDataType.EXTERNAL_CONTENTS + class_desc.fields = fields + class_desc.field_data = values + return class_desc + + +# ------------------------------------------------------------------------------ + + +class TestJavaobjV2(unittest.TestCase): + """ + Full test suite for javaobj V2 Parser + """ + + @classmethod + def setUpClass(cls): + """ + Calls Maven to compile & run Java classes that will generate serialized + data + """ + # Compute the java directory + java_dir = os.path.join(os.path.dirname(__file__), "java") + + if not os.getenv("JAVAOBJ_NO_MAVEN"): + # Run Maven and go back to the working folder + cwd = os.getcwd() + os.chdir(java_dir) + subprocess.call("mvn test", shell=True) + os.chdir(cwd) + + def read_file(self, filename, stream=False): + """ + Reads the content of the given file in binary mode + + :param filename: Name of the file to read + :param stream: If True, return the file stream + :return: File content or stream + """ + for subfolder in ("java", ""): + found_file = os.path.join( + os.path.dirname(__file__), subfolder, filename + ) + if os.path.exists(found_file): + break + else: + raise IOError("File not found: {0}".format(filename)) + + if stream: + return open(found_file, "rb") + else: + with open(found_file, "rb") as filep: + return filep.read() + + def test_char_rw(self): + """ + Reads testChar.ser and checks the serialization process + """ + jobj = self.read_file("testChar.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read char object: %s", pobj) + self.assertEqual(pobj, b"\x00C") + + def test_chars_rw(self): + """ + Reads testChars.ser and checks the serialization process + """ + # Expected string as a UTF-16 string + expected = "python-javaobj".encode("utf-16-be") + + jobj = self.read_file("testChars.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read char objects: %s", pobj) + self.assertEqual(pobj, expected) + self.assertEqual(pobj, expected.decode("latin1")) + + def test_gzip_open(self): + """ + Tests if the GZip auto-uncompress works + """ + with java_data_fd(self.read_file("testChars.ser", stream=True)) as fd: + base = fd.read() + + with java_data_fd( + self.read_file("testChars.ser.gz", stream=True) + ) as fd: + gzipped = fd.read() + + self.assertEqual( + base, gzipped, "Uncompressed content doesn't match the original" + ) + + def test_chars_gzip(self): + """ + Reads testChars.ser.gz + """ + # Expected string as a UTF-16 string + expected = "python-javaobj".encode("utf-16-be") + + jobj = self.read_file("testChars.ser.gz") + pobj = javaobj.loads(jobj) + _logger.debug("Read char objects: %s", pobj) + self.assertEqual(pobj, expected) + self.assertEqual(pobj, expected.decode("latin1")) + + def test_double_rw(self): + """ + Reads testDouble.ser and checks the serialization process + """ + jobj = self.read_file("testDouble.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read double object: %s", pobj) + + self.assertEqual(pobj, b"\x7f\xef\xff\xff\xff\xff\xff\xff") + + def test_bytes_rw(self): + """ + Reads testBytes.ser and checks the serialization process + """ + jobj = self.read_file("testBytes.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read bytes: %s", pobj) + + self.assertEqual(pobj, b"HelloWorld") + + def test_class_with_byte_array_rw(self): + """ + Tests handling of classes containing a Byte Array + """ + jobj = self.read_file("testClassWithByteArray.ser") + pobj = javaobj.loads(jobj) + + # j8spencer (Google, LLC) 2018-01-16: It seems specific support for + # byte arrays was added, but is a little out-of-step with the other + # types in terms of style. This UT was broken, since the "myArray" + # member has the array stored as a tuple of ints (not a byte string) + # in member called '_data.' I've updated to pass the UTs. + self.assertEqual(pobj.myArray._data, (1, 3, 7, 11)) + + def test_boolean(self): + """ + Reads testBoolean.ser and checks the serialization process + """ + jobj = self.read_file("testBoolean.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read boolean object: %s", pobj) + + self.assertEqual(pobj, bytes_char(0)) + + def test_byte(self): + """ + Reads testByte.ser + + The result from javaobj is a single-character string. + """ + jobj = self.read_file("testByte.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read Byte: %r", pobj) + + self.assertEqual(pobj, bytes_char(127)) + + def test_fields(self): + """ + Reads a serialized object and checks its fields + """ + jobj = self.read_file("test_readFields.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read object: %s", pobj) + + self.assertEqual(pobj.aField1, u"Gabba") + self.assertEqual(pobj.aField2, None) + + classdesc = pobj.get_class() + self.assertTrue(classdesc) + self.assertEqual(classdesc.serialVersionUID, 0x7F0941F5) + self.assertEqual(classdesc.name, "OneTest$SerializableTestHelper") + + _logger.debug("Class..........: %s", classdesc) + _logger.debug(".. Flags.......: %s", classdesc.flags) + _logger.debug(".. Fields Names: %s", classdesc.fields_names) + _logger.debug(".. Fields Types: %s", classdesc.fields_types) + + self.assertEqual(len(classdesc.fields_names), 3) + + def test_class(self): + """ + Reads the serialized String class + """ + jobj = self.read_file("testClass.ser") + pobj = javaobj.loads(jobj) + _logger.debug("Read object: %s", pobj) + self.assertEqual(pobj.name, "java.lang.String") + + # def test_swing_object(self): + # """ + # Reads a serialized Swing component + # """ + # jobj = self.read_file("testSwingObject.ser") + # pobj = javaobj.loads(jobj) + # _logger.debug("Read object: %s", pobj) + # + # classdesc = pobj.get_class() + # _logger.debug("Class..........: %s", classdesc) + # _logger.debug(".. Fields Names: %s", classdesc.fields_names) + # _logger.debug(".. Fields Types: %s", classdesc.fields_types) + + def test_super(self): + """ + Tests basic class inheritance handling + """ + jobj = self.read_file("objSuper.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + + classdesc = pobj.get_class() + _logger.debug(classdesc) + _logger.debug(classdesc.fields_names) + _logger.debug(classdesc.fields_types) + + self.assertEqual(pobj.childString, u"Child!!") + self.assertEqual(pobj.bool, True) + self.assertEqual(pobj.integer, -1) + self.assertEqual(pobj.superString, u"Super!!") + + def test_arrays(self): + """ + Tests handling of Java arrays + """ + jobj = self.read_file("objArrays.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + + classdesc = pobj.get_class() + _logger.debug(classdesc) + _logger.debug(classdesc.fields_names) + _logger.debug(classdesc.fields_types) + + # public String[] stringArr = {"1", "2", "3"}; + # public int[] integerArr = {1,2,3}; + # public boolean[] boolArr = {true, false, true}; + # public TestConcrete[] concreteArr = {new TestConcrete(), + # new TestConcrete()}; + + _logger.debug(pobj.stringArr) + _logger.debug(pobj.integerArr) + _logger.debug(pobj.boolArr) + _logger.debug(pobj.concreteArr) + + def test_japan(self): + """ + Tests the UTF encoding handling with Japanese characters + """ + # Japan.ser contains a string using wide characters: the name of the + # state from Japan (according to wikipedia) + jobj = self.read_file("testJapan.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + # Compare the UTF-8 encoded version of the name + self.assertEqual( + pobj, b"\xe6\x97\xa5\xe6\x9c\xac\xe5\x9b\xbd".decode("utf-8") + ) + + def test_char_array(self): + """ + Tests the loading of a wide-char array + """ + jobj = self.read_file("testCharArray.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertEqual( + pobj, + [ + u"\u0000", + u"\ud800", + u"\u0001", + u"\udc00", + u"\u0002", + u"\uffff", + u"\u0003", + ], + ) + + def test_2d_array(self): + """ + Tests the handling of a 2D array + """ + jobj = self.read_file("test2DArray.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertEqual( + pobj, [[1, 2, 3], [4, 5, 6],], + ) + + def test_class_array(self): + """ + Tests the handling of an array of Class objects + """ + jobj = self.read_file("testClassArray.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertEqual(pobj[0].name, "java.lang.Integer") + self.assertEqual(pobj[1].name, "java.io.ObjectOutputStream") + self.assertEqual(pobj[2].name, "java.lang.Exception") + + def test_enums(self): + """ + Tests the handling of "enum" types + """ + jobj = self.read_file("objEnums.ser") + pobj = javaobj.loads(jobj) + + classdesc = pobj.get_class() + _logger.debug("classdesc: {0}".format(classdesc)) + _logger.debug("fields_names: {0}".format(classdesc.fields_names)) + _logger.debug("fields_types: {0}".format(classdesc.fields_types)) + + self.assertEqual(classdesc.name, "ClassWithEnum") + self.assertEqual(pobj.color.classdesc.name, "Color") + self.assertEqual(pobj.color.constant, u"GREEN") + + for color, intended in zip(pobj.colors, (u"GREEN", u"BLUE", u"RED")): + _logger.debug("color: {0} - {1}".format(color, type(color))) + self.assertEqual(color.classdesc.name, "Color") + self.assertEqual(color.constant, intended) + + def test_sets(self): + """ + Tests handling of HashSet and TreeSet + """ + for filename in ( + "testHashSet.ser", + "testTreeSet.ser", + "testLinkedHashSet.ser", + ): + _logger.debug("Loading file: %s", filename) + jobj = self.read_file(filename) + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertIsInstance(pobj, set) + self.assertSetEqual({i.value for i in pobj}, {1, 2, 42}) + + def test_times(self): + """ + Tests the handling of java.time classes + """ + jobj = self.read_file("testTime.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + + # First one is a duration of 10s + duration = pobj[0] + self.assertEqual(duration.second, 10) + + # Check types + self.assertIsInstance(pobj, javaobj.beans.JavaArray) + for obj in pobj: + self.assertIsInstance(obj, javaobj.transformers.JavaTime) + + # def test_exception(self): + # jobj = self.read_file("objException.ser") + # pobj = javaobj.loads(jobj) + # _logger.debug(pobj) + # + # classdesc = pobj.get_class() + # _logger.debug(classdesc) + # _logger.debug(classdesc.fields_names) + # _logger.debug(classdesc.fields_types) + # + # # TODO: add some tests + # self.assertEqual(classdesc.name, "MyExceptionWhenDumping") + + def test_sun_example(self): + content = javaobj.load(self.read_file("sunExample.ser", stream=True)) + + pobj = content[0] + self.assertEqual(pobj.value, 17) + self.assertTrue(pobj.next) + + pobj = content[1] + self.assertEqual(pobj.value, 19) + self.assertFalse(pobj.next) + + def test_collections(self): + """ + Tests the handling of ArrayList, LinkedList and HashMap + """ + jobj = self.read_file("objCollections.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + + _logger.debug("arrayList: %s", pobj.arrayList) + self.assertTrue(isinstance(pobj.arrayList, list)) + _logger.debug("hashMap: %s", pobj.hashMap) + self.assertTrue(isinstance(pobj.hashMap, dict)) + _logger.debug("linkedList: %s", pobj.linkedList) + self.assertTrue(isinstance(pobj.linkedList, list)) + + # FIXME: referencing problems with the collection class + + def test_jceks_issue_5(self): + """ + Tests the handling of JCEKS issue #5 + """ + jobj = self.read_file("jceks_issue_5.ser") + pobj = javaobj.loads(jobj) + _logger.info(pobj) + + def test_qistoph_pr_27(self): + """ + Tests support for Bool, Integer, Long classes (PR #27) + """ + # Load the basic map + jobj = self.read_file("testBoolIntLong.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + + # Basic checking + self.assertEqual(pobj[u"key1"], u"value1") + self.assertEqual(pobj[u"key2"], u"value2") + self.assertEqual(pobj[u"int"], 9) + self.assertEqual(pobj[u"int2"], 10) + self.assertEqual(pobj[u"bool"], True) + self.assertEqual(pobj[u"bool2"], True) + + # Load the parent map + jobj2 = self.read_file("testBoolIntLong-2.ser") + pobj2 = javaobj.loads(jobj2) + _logger.debug(pobj2) + + parent_map = pobj2[u"subMap"] + for key, value in pobj.items(): + self.assertEqual(parent_map[key], value) + + def test_writeObject(self): + """ + Tests support for custom writeObject (PR #38) + """ + + ser = self.read_file("testCustomWriteObject.ser") + transformers = [ + CustomWriterTransformer(), + RandomChildTransformer(), + JavaRandomTransformer(), + ] + pobj = javaobj.loads(ser, *transformers) + + self.assertEqual(isinstance(pobj, CustomWriterInstance), True) + self.assertEqual( + isinstance(pobj.field_data["custom_obj"], RandomChildInstance), + True, + ) + + parent_data = pobj.field_data + child_data = parent_data["custom_obj"].field_data + super_data = parent_data["custom_obj"].annotations + expected = { + "int_not_in_fields": 0, + "custom_obj": { + "field_data": {"doub": 4.5, "num": 1}, + "annotations": { + "haveNextNextGaussian": False, + "nextNextGaussian": 0.0, + "seed": 25214903879, + }, + }, + } + + self.assertEqual( + expected["int_not_in_fields"], parent_data["int_not_in_fields"] + ) + self.assertEqual(expected["custom_obj"]["field_data"], child_data) + self.assertEqual(expected["custom_obj"]["annotations"], super_data) + + +# ------------------------------------------------------------------------------ + + +if __name__ == "__main__": + # Setup logging + logging.basicConfig(level=logging.INFO) + + # Run tests + unittest.main()