diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..a0c19b2
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,5 @@
+[run]
+source = javaobj/
+
+[report]
+include = javaobj/*
diff --git a/.coveralls.yml b/.coveralls.yml
deleted file mode 100644
index 9160059..0000000
--- a/.coveralls.yml
+++ /dev/null
@@ -1 +0,0 @@
-service_name: travis-ci
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..b1493ad
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,17 @@
+root=true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+indent_style = space
+trim_trailing_whitespace = true
+
+[*.py]
+indent_size = 4
+
+[*.rst]
+indent_size = 3
+
+[*.{yml,yaml,toml}]
+indent_size = 2
diff --git a/.github/workflows/build-20.04.yml b/.github/workflows/build-20.04.yml
new file mode 100644
index 0000000..2d8a2bc
--- /dev/null
+++ b/.github/workflows/build-20.04.yml
@@ -0,0 +1,49 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: CI Build - Python 3.5-3.7
+
+on:
+ push:
+ branches: [ "master" ]
+ tags: '**'
+ pull_request:
+ branches: [ "master" ]
+
+jobs:
+ build:
+ timeout-minutes: 10
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.5", "3.6", "3.7"]
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ env:
+ PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install flake8 pytest coverage
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+ - name: Lint with flake8
+ run: |
+ # stop the build if there are Python syntax errors or undefined names
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+ - name: Test
+ run: |
+ coverage run -m pytest
+ - name: Coveralls
+ env:
+ COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
+ run: |
+ pip install coveralls
+ coveralls
diff --git a/.github/workflows/build-24.04.yml b/.github/workflows/build-24.04.yml
new file mode 100644
index 0000000..4c25cf3
--- /dev/null
+++ b/.github/workflows/build-24.04.yml
@@ -0,0 +1,47 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: CI Build - Python 3.8+
+
+on:
+ push:
+ branches: [ "master" ]
+ tags: '**'
+ pull_request:
+ branches: [ "master" ]
+
+jobs:
+ build:
+ timeout-minutes: 10
+ runs-on: ubuntu-24.04
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14-dev"]
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install flake8 pytest coverage
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+ - name: Lint with flake8
+ run: |
+ # stop the build if there are Python syntax errors or undefined names
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+ - name: Test
+ run: |
+ coverage run -m pytest
+ - name: Coveralls
+ env:
+ COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
+ run: |
+ pip install coveralls
+ coveralls
diff --git a/.gitignore b/.gitignore
index 5ffbefd..9711698 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
*.so
# Packages
+.eggs/
*.egg
*.egg-info
dist
@@ -37,3 +38,11 @@ nosetests.xml
.idea/
.vscode/
.*cache/
+
+# Log files
+*.log
+
+# Folders and scripts used to reproduce issues
+/issue*/
+/repro*.py
+/test*.py
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 4d82b77..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-language: python
-python:
- - "2.7"
- - "3.4"
- - "3.5"
- - "3.6"
-
-sudo: false
-
-install:
- - pip install nose coverage coveralls
- - pip install pytest>=2.7.3 --upgrade
-
-script:
- - nosetests -v --with-coverage --cover-package=javaobj tests
-
-after_success:
- - coveralls
diff --git a/AUTHORS b/AUTHORS
index aa70b71..bbc1a99 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -9,3 +9,7 @@ Many thanks to the contributors:
* @voetsjoeba
* Vadim Markovtsev (@vmarkovtsev)
* Jason Spencer, Google LLC (@j8spencer)
+* @guywithface
+* Chris van Marle (@qistoph)
+* Federico Alves (@UruDev)
+* @sarimak
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4385a0b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,482 @@
+# javaobj-py3
+
+[](https://pypi.python.org/pypi/javaobj-py3/)
+[](https://pypi.python.org/pypi/javaobj-py3/)
+[](https://github.com/tcalmant/python-javaobj/actions/workflows/build.yml)
+[](https://coveralls.io/r/tcalmant/python-javaobj?branch=master)
+
+*python-javaobj* is a python library that provides functions for reading and
+writing (writing is WIP currently) Java objects serialized or will be
+deserialized by `ObjectOutputStream`. This form of object representation is a
+standard data interchange format in Java world.
+
+The `javaobj` module exposes an API familiar to users of the standard library
+`marshal`, `pickle` and `json` modules.
+
+## About this repository
+
+This project is a fork of *python-javaobj* by Volodymyr Buell, originally from
+[Google Code](http://code.google.com/p/python-javaobj/) and now hosted on
+[GitHub](https://github.com/vbuell/python-javaobj).
+
+This fork intends to work both on Python 2.7 and Python 3.4+.
+
+## Compatibility Warnings
+
+### New implementation of the parser
+
+| Implementations | Version |
+|-----------------|----------|
+| `v1`, `v2` | `0.4.0+` |
+
+Since version 0.4.0, two implementations of the parser are available:
+
+* `v1`: the *classic* implementation of `javaobj`, with a work in progress
+ implementation of a writer.
+* `v2`: the *new* implementation, which is a port of the Java project
+ [`jdeserialize`](https://github.com/frohoff/jdeserialize/),
+ with support of the object transformer (with a new API) and of the `numpy`
+ arrays loading.
+
+You can use the `v1` parser to ensure that the behaviour of your scripts
+doesn't change and to keep the ability to write down files.
+
+You can use the `v2` parser for new developments
+*which won't require marshalling* and as a *fallback* if the `v1`
+fails to parse a file.
+
+### Object transformers V1
+
+| Implementations | Version |
+|-----------------|----------|
+| `v1` | `0.2.0+` |
+
+As of version 0.2.0, the notion of *object transformer* from the original
+project as been replaced by an *object creator*.
+
+The *object creator* is called before the deserialization.
+This allows to store the reference of the converted object before deserializing
+it, and avoids a mismatch between the referenced object and the transformed one.
+
+### Object transformers V2
+
+| Implementations | Version |
+|-----------------|----------|
+| `v2` | `0.4.0+` |
+
+The `v2` implementation provides a new API for the object transformers.
+Please look at the *Usage (V2)* section in this file.
+
+### Bytes arrays
+
+| Implementations | Version |
+|-----------------|----------|
+| `v1` | `0.2.3+` |
+
+As of version 0.2.3, bytes arrays are loaded as a `bytes` object instead of
+an array of integers.
+
+### Custom Transformer
+
+| Implementations | Version |
+|-----------------|----------|
+| `v2` | `0.4.2+` |
+
+A new transformer API has been proposed to handle objects written with a custom
+Java writer.
+You can find a sample usage in the *Custom Transformer* section in this file.
+
+## Features
+
+* Java object instance un-marshalling
+* Java classes un-marshalling
+* Primitive values un-marshalling
+* Automatic conversion of Java Collections to python ones
+ (`HashMap` => `dict`, `ArrayList` => `list`, etc.)
+* Basic marshalling of simple Java objects (`v1` implementation only)
+* Automatically uncompresses GZipped files
+
+## Requirements
+
+* Python >= 2.7 or Python >= 3.4
+* `enum34` and `typing` when using Python <= 3.4 (installable with `pip`)
+* Maven 2+ (for building test data of serialized objects.
+ You can skip it if you do not plan to run `tests.py`)
+
+## Usage (V1 implementation)
+
+Un-marshalling of Java serialised object:
+
+```python
+import javaobj
+
+with open("obj5.ser", "rb") as fd:
+ jobj = fd.read()
+
+pobj = javaobj.loads(jobj)
+print(pobj)
+```
+
+Or, you can use `JavaObjectUnmarshaller` object directly:
+
+```python
+import javaobj
+
+with open("objCollections.ser", "rb") as fd:
+ marshaller = javaobj.JavaObjectUnmarshaller(fd)
+ pobj = marshaller.readObject()
+
+ print(pobj.value, "should be", 17)
+ print(pobj.next, "should be", True)
+
+ pobj = marshaller.readObject()
+```
+
+**Note:** The objects and methods provided by `javaobj` module are shortcuts
+to the `javaobj.v1` package, for Compatibility purpose.
+It is **recommended** to explicitly import methods and classes from the `v1`
+(or `v2`) package when writing new code, in order to be sure that your code
+won't need import updates in the future.
+
+
+## Usage (V2 implementation)
+
+The following methods are provided by the `javaobj.v2` package:
+
+* `load(fd, *transformers, use_numpy_arrays=False)`:
+ Parses the content of the given file descriptor, opened in binary mode (`rb`).
+ The method accepts a list of custom object transformers. The default object
+ transformer is always added to the list.
+
+ The `use_numpy_arrays` flag indicates that the arrays of primitive type
+ elements must be loaded using `numpy` (if available) instead of using the
+ standard parsing technic.
+
+* `loads(bytes, *transformers, use_numpy_arrays=False)`:
+ This the a shortcut to the `load()` method, providing it the binary data
+ using a `BytesIO` object.
+
+**Note:** The V2 parser doesn't have the marshalling capability.
+
+Sample usage:
+
+```python
+import javaobj.v2 as javaobj
+
+with open("obj5.ser", "rb") as fd:
+ pobj = javaobj.load(fd)
+
+print(pobj.dump())
+```
+
+### Object Transformer
+
+An object transformer can be called during the parsing of a Java object
+instance or while loading an array.
+
+The Java object instance parsing works in two main steps:
+
+1. The transformer is called to create an instance of a bean that inherits
+ `JavaInstance`.
+1. The latter bean is then called:
+
+ * When the object is written with a custom block data
+ * After the fields and annotations have been parsed, to update the content
+ of the Python bean.
+
+Here is an example for a Java `HashMap` object. You can look at the code of
+the `javaobj.v2.transformer` module to see the whole implementation.
+
+```python
+class JavaMap(dict, javaobj.v2.beans.JavaInstance):
+ """
+ Inherits from dict for Python usage, JavaInstance for parsing purpose
+ """
+ def __init__(self):
+ # Don't forget to call both constructors
+ dict.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_blockdata(self, parser, reader, indent=0):
+ """
+ Reads content stored in a block data.
+
+ This method is called only if the class description has both the
+ `SC_EXTERNALIZABLE` and `SC_BLOCK_DATA` flags set.
+
+ The stream parsing will stop and fail if this method returns False.
+
+ :param parser: The JavaStreamParser in use
+ :param reader: The underlying data stream reader
+ :param indent: Indentation to use in logs
+ :return: True on success, False on error
+ """
+ # This kind of class is not supposed to have the SC_BLOCK_DATA flag set
+ return False
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from the parsed instance object.
+
+ This method is called after the block data (if any), the fields and
+ the annotations have been loaded.
+
+ :param indent: Indentation to use while logging
+ :return: True on success (currently ignored)
+ """
+ # Maps have their content in their annotations
+ for cd, annotations in self.annotations.items():
+ # Annotations are associated to their definition class
+ if cd.name == "java.util.HashMap":
+ # We are in the annotation created by the handled class
+ # Group annotation elements 2 by 2
+ # (storage is: key, value, key, value, ...)
+ args = [iter(annotations[1:])] * 2
+ for key, value in zip(*args):
+ self[key] = value
+
+ # Job done
+ return True
+
+ # Couldn't load the data
+ return False
+
+class MapObjectTransformer(javaobj.v2.api.ObjectTransformer):
+ """
+ Creates a JavaInstance object with custom loading methods for the
+ classes it can handle
+ """
+ def create_instance(self, classdesc):
+ # type: (JavaClassDesc) -> Optional[JavaInstance]
+ """
+ Transforms a parsed Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ if classdesc.name == "java.util.HashMap":
+ # We can handle this class description
+ return JavaMap()
+ else:
+ # Return None if the class is not handled
+ return None
+```
+
+### Custom Object Transformer
+
+The custom transformer is called when the class is not handled by the default
+object transformer.
+A custom object transformer still inherits from the `ObjectTransformer` class,
+but it also implements the `load_custom_writeObject` method.
+
+The sample given here is used in the unit tests.
+
+#### Java sample
+
+On the Java side, we create various classes and write them as we wish:
+
+```java
+class CustomClass implements Serializable {
+
+ private static final long serialVersionUID = 1;
+
+ public void start(ObjectOutputStream out) throws Exception {
+ this.writeObject(out);
+ }
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ CustomWriter custom = new CustomWriter(42);
+ out.writeObject(custom);
+ out.flush();
+ }
+}
+
+class RandomChild extends Random {
+
+ private static final long serialVersionUID = 1;
+ private int num = 1;
+ private double doub = 4.5;
+
+ RandomChild(int seed) {
+ super(seed);
+ }
+}
+
+class CustomWriter implements Serializable {
+ protected RandomChild custom_obj;
+
+ CustomWriter(int seed) {
+ custom_obj = new RandomChild(seed);
+ }
+
+ private static final long serialVersionUID = 1;
+ private static final int CURRENT_SERIAL_VERSION = 0;
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ out.writeInt(CURRENT_SERIAL_VERSION);
+ out.writeObject(custom_obj);
+ }
+}
+```
+
+An here is a sample writing of that kind of object:
+
+```java
+ObjectOutputStream oos = new ObjectOutputStream(
+ new FileOutputStream("custom_objects.ser"));
+CustomClass writer = new CustomClass();
+writer.start(oos);
+oos.flush();
+oos.close();
+```
+
+#### Python sample
+
+On the Python side, the first step is to define the custom transformers.
+They are children of the `javaobj.v2.transformers.ObjectTransformer` class.
+
+```python
+class BaseTransformer(javaobj.v2.transformers.ObjectTransformer):
+ """
+ Creates a JavaInstance object with custom loading methods for the
+ classes it can handle
+ """
+
+ def __init__(self, handled_classes=None):
+ self.instance = None
+ self.handled_classes = handled_classes or {}
+
+ def create_instance(self, classdesc):
+ """
+ Transforms a parsed Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ if classdesc.name in self.handled_classes:
+ self.instance = self.handled_classes[classdesc.name]()
+ return self.instance
+
+ return None
+
+class RandomChildTransformer(BaseTransformer):
+ def __init__(self):
+ super(RandomChildTransformer, self).__init__(
+ {"RandomChild": RandomChildInstance}
+ )
+
+class CustomWriterTransformer(BaseTransformer):
+ def __init__(self):
+ super(CustomWriterTransformer, self).__init__(
+ {"CustomWriter": CustomWriterInstance}
+ )
+
+class JavaRandomTransformer(BaseTransformer):
+ def __init__(self):
+ super(JavaRandomTransformer, self).__init__()
+ self.name = "java.util.Random"
+ self.field_names = ["haveNextNextGaussian", "nextNextGaussian", "seed"]
+ self.field_types = [
+ javaobj.v2.beans.FieldType.BOOLEAN,
+ javaobj.v2.beans.FieldType.DOUBLE,
+ javaobj.v2.beans.FieldType.LONG,
+ ]
+
+ def load_custom_writeObject(self, parser, reader, name):
+ if name != self.name:
+ return None
+
+ fields = []
+ values = []
+ for f_name, f_type in zip(self.field_names, self.field_types):
+ values.append(parser._read_field_value(f_type))
+ fields.append(javaobj.beans.JavaField(f_type, f_name))
+
+ class_desc = javaobj.beans.JavaClassDesc(
+ javaobj.beans.ClassDescType.NORMALCLASS
+ )
+ class_desc.name = self.name
+ class_desc.desc_flags = javaobj.beans.ClassDataType.EXTERNAL_CONTENTS
+ class_desc.fields = fields
+ class_desc.field_data = values
+ return class_desc
+```
+
+Second step is defining the representation of the instances, where the real
+object loading occurs. Those classes inherit from
+`javaobj.v2.beans.JavaInstance`.
+
+```python
+class CustomWriterInstance(javaobj.v2.beans.JavaInstance):
+ def __init__(self):
+ javaobj.v2.beans.JavaInstance.__init__(self)
+
+ def load_from_instance(self):
+ """
+ Updates the content of this instance
+ from its parsed fields and annotations
+ :return: True on success, False on error
+ """
+ if self.classdesc and self.classdesc in self.annotations:
+ # Here, we known there is something written before the fields,
+ # even if it's not declared in the class description
+ fields = ["int_not_in_fields"] + self.classdesc.fields_names
+ raw_data = self.annotations[self.classdesc]
+ int_not_in_fields = struct.unpack(
+ ">i", BytesIO(raw_data[0].data).read(4)
+ )[0]
+ custom_obj = raw_data[1]
+ values = [int_not_in_fields, custom_obj]
+ self.field_data = dict(zip(fields, values))
+ return True
+
+ return False
+
+
+class RandomChildInstance(javaobj.v2.beans.JavaInstance):
+ def load_from_instance(self):
+ """
+ Updates the content of this instance
+ from its parsed fields and annotations
+ :return: True on success, False on error
+ """
+ if self.classdesc and self.classdesc in self.field_data:
+ fields = self.classdesc.fields_names
+ values = [
+ self.field_data[self.classdesc][self.classdesc.fields[i]]
+ for i in range(len(fields))
+ ]
+ self.field_data = dict(zip(fields, values))
+ if (
+ self.classdesc.super_class
+ and self.classdesc.super_class in self.annotations
+ ):
+ super_class = self.annotations[self.classdesc.super_class][0]
+ self.annotations = dict(
+ zip(super_class.fields_names, super_class.field_data)
+ )
+ return True
+
+ return False
+```
+
+Finally we can use the transformers in the loading process.
+Note that even if it is not explicitly given, the `DefaultObjectTransformer`
+will be also be used, as it is added automatically by `javaobj` if it is
+missing from the given list.
+
+```python
+# Load the object using those transformers
+transformers = [
+ CustomWriterTransformer(),
+ RandomChildTransformer(),
+ JavaRandomTransformer()
+]
+pobj = javaobj.loads("custom_objects.ser", *transformers)
+
+# Here we show a field that isn't visible from the class description
+# The field belongs to the class but it's not serialized by default because
+# it's static. See: https://stackoverflow.com/a/16477421/12621168
+print(pobj.field_data["int_not_in_fields"])
+```
diff --git a/README.rst b/README.rst
deleted file mode 100644
index 5e2a936..0000000
--- a/README.rst
+++ /dev/null
@@ -1,97 +0,0 @@
-javaobj-py3
-###########
-
-.. image:: https://img.shields.io/pypi/v/javaobj-py3.svg
- :target: https://pypi.python.org/pypi/javaobj-py3/
- :alt: Latest Version
-
-.. image:: https://img.shields.io/pypi/l/javaobj-py3.svg
- :target: https://pypi.python.org/pypi/javaobj-py3/
- :alt: License
-
-.. image:: https://travis-ci.org/tcalmant/python-javaobj.svg?branch=master
- :target: https://travis-ci.org/tcalmant/python-javaobj
- :alt: Travis-CI status
-
-.. image:: https://coveralls.io/repos/tcalmant/python-javaobj/badge.svg?branch=master
- :target: https://coveralls.io/r/tcalmant/python-javaobj?branch=master
- :alt: Coveralls status
-
-*python-javaobj* is a python library that provides functions for reading and
-writing (writing is WIP currently) Java objects serialized or will be
-deserialized by ``ObjectOutputStream``. This form of object representation is a
-standard data interchange format in Java world.
-
-The ``javaobj`` module exposes an API familiar to users of the standard library
-``marshal``, ``pickle`` and ``json`` modules.
-
-About this repository
-=====================
-
-This project is a fork of *python-javaobj* by Volodymyr Buell, originally from
-`Google Code `_ and now hosted on
-`GitHub `_.
-
-This fork intends to work both on Python 2.7 and Python 3.4+.
-
-Compatibility Warning: object transformer
------------------------------------------
-
-As of version 0.2.0, the notion of *object transformer* from the original
-project as been replaced by an *object creator*.
-
-The *object creator* is called before the deserialization.
-This allows to store the reference of the converted object before deserializing
-it, and avoids a mismatch between the referenced object and the transformed one.
-
-
-Compatibility Warning: bytes arrays
------------------------------------
-
-As of version 0.2.3, bytes arrays are loaded as a ``bytes`` object instead of
-an array of integers.
-
-
-Features
-========
-
-* Java object instance unmarshaling
-* Java classes unmarshaling
-* Primitive values unmarshaling
-* Automatic conversion of Java Collections to python ones
- (``HashMap`` => ``dict``, ``ArrayList`` => ``list``, etc.)
-* Basic marshalling of simple Java objects
-
-Requirements
-============
-
-* Python >= 2.7 or Python >= 3.4
-* Maven 2+ (for building test data of serialized objects.
- You can skip it if you do not plan to run ``tests.py``)
-
-Usage
-=====
-
-Unmarshalling of Java serialised object:
-
-.. code-block:: python
-
- import javaobj
-
- jobj = self.read_file("obj5.ser")
- pobj = javaobj.loads(jobj)
- print(pobj)
-
-Or, you can use Unmarshaller object directly:
-
-.. code-block:: python
-
- import javaobj
-
- marshaller = javaobj.JavaObjectUnmarshaller(open("objCollections.ser"))
- pobj = marshaller.readObject()
-
- self.assertEqual(pobj.value, 17)
- self.assertTrue(pobj.next)
-
- pobj = marshaller.readObject()
diff --git a/javaobj.py b/javaobj.py
deleted file mode 100644
index 2311ad3..0000000
--- a/javaobj.py
+++ /dev/null
@@ -1,1681 +0,0 @@
-#!/usr/bin/python
-# -- Content-Encoding: UTF-8 --
-"""
-Provides functions for reading and writing (writing is WIP currently) Java
-objects serialized or will be deserialized by ObjectOutputStream. This form of
-object representation is a standard data interchange format in Java world.
-
-javaobj module exposes an API familiar to users of the standard library
-marshal, pickle and json modules.
-
-See:
-http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
-
-:authors: Volodymyr Buell, Thomas Calmant
-:license: Apache License 2.0
-:version: 0.2.3
-:status: Alpha
-
-..
-
- Copyright 2016 Thomas Calmant
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-# Standard library
-import collections
-import logging
-import os
-import struct
-import sys
-
-try:
- # Python 2
- from StringIO import StringIO as BytesIO
-except ImportError:
- # Python 3+
- from io import BytesIO
-
-# ------------------------------------------------------------------------------
-
-# Module version
-__version_info__ = (0, 2, 3)
-__version__ = ".".join(str(x) for x in __version_info__)
-
-# Documentation strings format
-__docformat__ = "restructuredtext en"
-
-# ------------------------------------------------------------------------------
-
-# Setup the logger
-_log = logging.getLogger(__name__)
-
-
-def log_debug(message, ident=0):
- """
- Logs a message at debug level
-
- :param message: Message to log
- :param ident: Number of indentation spaces
- """
- _log.debug(" " * (ident * 2) + str(message))
-
-
-def log_error(message, ident=0):
- """
- Logs a message at error level
-
- :param message: Message to log
- :param ident: Number of indentation spaces
- """
- _log.error(" " * (ident * 2) + str(message))
-
-# ------------------------------------------------------------------------------
-
-if sys.version_info[0] >= 3:
- # Python 3 interpreter : bytes & str
- def to_bytes(data, encoding="UTF-8"):
- """
- Converts the given string to an array of bytes.
- Returns the first parameter if it is already an array of bytes.
-
- :param data: A unicode string
- :param encoding: The encoding of data
- :return: The corresponding array of bytes
- """
- if type(data) is bytes:
- # Nothing to do
- return data
- return data.encode(encoding)
-
- def to_str(data, encoding="UTF-8"):
- """
- Converts the given parameter to a string.
- Returns the first parameter if it is already an instance of ``str``.
-
- :param data: A string
- :param encoding: The encoding of data
- :return: The corresponding string
- """
- if type(data) is str:
- # Nothing to do
- return data
- return str(data, encoding)
-
- def read_to_str(data):
- """
- Concats all bytes into a string
- """
- return ''.join(chr(char) for char in data)
-
-else:
- # Python 2 interpreter : str & unicode
- def to_str(data, encoding="UTF-8"):
- """
- Converts the given parameter to a string.
- Returns the first parameter if it is already an instance of ``str``.
-
- :param data: A string
- :param encoding: The encoding of data
- :return: The corresponding string
- """
- if type(data) is str:
- # Nothing to do
- return data
- return data.encode(encoding)
-
- # Same operation
- to_bytes = to_str
-
- def read_to_str(data):
- """
- Nothing to do in Python 2
- """
- return data
-
-# ------------------------------------------------------------------------------
-
-
-def load(file_object, *transformers, **kwargs):
- """
- Deserializes Java primitive data and objects serialized using
- ObjectOutputStream from a file-like object.
-
- :param file_object: A file-like object
- :param transformers: Custom transformers to use
- :param ignore_remaining_data: If True, don't log an error when unused
- trailing bytes are remaining
- :return: The deserialized object
- """
- # Read keyword argument
- ignore_remaining_data = kwargs.get('ignore_remaining_data', False)
-
- marshaller = JavaObjectUnmarshaller(
- file_object, kwargs.get('use_numpy_arrays', False))
-
- # Add custom transformers first
- for transformer in transformers:
- marshaller.add_transformer(transformer)
- marshaller.add_transformer(DefaultObjectTransformer())
-
- # Read the file object
- return marshaller.readObject(ignore_remaining_data=ignore_remaining_data)
-
-
-def loads(string, *transformers, **kwargs):
- """
- Deserializes Java objects and primitive data serialized using
- ObjectOutputStream from a string.
-
- :param string: A Java data string
- :param transformers: Custom transformers to use
- :param ignore_remaining_data: If True, don't log an error when unused
- trailing bytes are remaining
- :return: The deserialized object
- """
- # Read keyword argument
- ignore_remaining_data = kwargs.get('ignore_remaining_data', False)
-
- # Reuse the load method (avoid code duplication)
- return load(BytesIO(string), *transformers,
- ignore_remaining_data=ignore_remaining_data)
-
-
-def dumps(obj, *transformers):
- """
- Serializes Java primitive data and objects unmarshaled by load(s) before
- into string.
-
- :param obj: A Python primitive object, or one loaded using load(s)
- :param transformers: Custom transformers to use
- :return: The serialized data as a string
- """
- marshaller = JavaObjectMarshaller()
- # Add custom transformers
- for transformer in transformers:
- marshaller.add_transformer(transformer)
-
- return marshaller.dump(obj)
-
-# ------------------------------------------------------------------------------
-
-
-class JavaClass(object):
- """
- Represents a class in the Java world
- """
- def __init__(self):
- """
- Sets up members
- """
- self.name = None
- self.serialVersionUID = None
- self.flags = None
- self.fields_names = []
- self.fields_types = []
- self.superclass = None
-
- def __str__(self):
- """
- String representation of the Java class
- """
- return self.__repr__()
-
- def __repr__(self):
- """
- String representation of the Java class
- """
- return "[{0:s}:0x{1:X}]".format(self.name, self.serialVersionUID)
-
- def __eq__(self, other):
- """
- Equality test between two Java classes
-
- :param other: Other JavaClass to test
- :return: True if both classes share the same fields and name
- """
- if not isinstance(other, type(self)):
- return False
-
- return (self.name == other.name and
- self.serialVersionUID == other.serialVersionUID and
- self.flags == other.flags and
- self.fields_names == other.fields_names and
- self.fields_types == other.fields_types and
- self.superclass == other.superclass)
-
-
-class JavaObject(object):
- """
- Represents a deserialized non-primitive Java object
- """
- def __init__(self):
- """
- Sets up members
- """
- self.classdesc = None
- self.annotations = []
-
- def get_class(self):
- """
- Returns the JavaClass that defines the type of this object
- """
- return self.classdesc
-
- def __str__(self):
- """
- String representation
- """
- return self.__repr__()
-
- def __repr__(self):
- """
- String representation
- """
- name = "UNKNOWN"
- if self.classdesc:
- name = self.classdesc.name
- return "".format(name)
-
- def __eq__(self, other):
- """
- Equality test between two Java classes
-
- :param other: Other JavaClass to test
- :return: True if both classes share the same fields and name
- """
- if not isinstance(other, type(self)):
- return False
-
- res = (self.classdesc == other.classdesc and
- self.annotations == other.annotations)
- if not res:
- return False
-
- for name in self.classdesc.fields_names:
- if not getattr(self, name) == getattr(other, name):
- return False
- return True
-
-
-class JavaString(str):
- """
- Represents a Java String
- """
- def __hash__(self):
- return str.__hash__(self)
-
- def __eq__(self, other):
- if not isinstance(other, str):
- return False
- return str.__eq__(self, other)
-
-
-class JavaEnum(JavaObject):
- """
- Represents a Java enumeration
- """
- def __init__(self, constant=None):
- super(JavaEnum, self).__init__()
- self.constant = constant
-
-
-class JavaArray(list, JavaObject):
- """
- Represents a Java Array
- """
- def __init__(self, classdesc=None):
- list.__init__(self)
- JavaObject.__init__(self)
- self.classdesc = classdesc
-
-
-class JavaByteArray(JavaObject):
- """
- Represents the special case of Java Array which contains bytes
- """
- def __init__(self, data, classdesc=None):
- JavaObject.__init__(self)
- self._data = struct.unpack("b" * len(data), data)
- self.classdesc = classdesc
-
- def __str__(self):
- return "JavaByteArray({0})".format(self._data)
-
- def __getitem__(self, item):
- return self._data[item]
-
- def __iter__(self):
- return iter(self._data)
-
- def __len__(self):
- return len(self._data)
-
-# ------------------------------------------------------------------------------
-
-
-class JavaObjectConstants(object):
- """
- Defines the constants of the Java serialization format
- """
- STREAM_MAGIC = 0xaced
- STREAM_VERSION = 0x05
-
- TC_NULL = 0x70
- TC_REFERENCE = 0x71
- TC_CLASSDESC = 0x72
- TC_OBJECT = 0x73
- TC_STRING = 0x74
- TC_ARRAY = 0x75
- TC_CLASS = 0x76
- TC_BLOCKDATA = 0x77
- TC_ENDBLOCKDATA = 0x78
- TC_RESET = 0x79
- TC_BLOCKDATALONG = 0x7A
- TC_EXCEPTION = 0x7B
- TC_LONGSTRING = 0x7C
- TC_PROXYCLASSDESC = 0x7D
- TC_ENUM = 0x7E
- # Ignore TC_MAX: we don't use it and it messes with TC_ENUM
- # TC_MAX = 0x7E
-
- # classDescFlags
- SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE
- SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE
- SC_SERIALIZABLE = 0x02
- SC_EXTERNALIZABLE = 0x04
- SC_ENUM = 0x10
-
- # type definition chars (typecode)
- TYPE_BYTE = 'B' # 0x42
- TYPE_CHAR = 'C' # 0x43
- TYPE_DOUBLE = 'D' # 0x44
- TYPE_FLOAT = 'F' # 0x46
- TYPE_INTEGER = 'I' # 0x49
- TYPE_LONG = 'J' # 0x4A
- TYPE_SHORT = 'S' # 0x53
- TYPE_BOOLEAN = 'Z' # 0x5A
- TYPE_OBJECT = 'L' # 0x4C
- TYPE_ARRAY = '[' # 0x5B
-
- # list of supported typecodes listed above
- TYPECODES_LIST = [
- # primitive types
- TYPE_BYTE,
- TYPE_CHAR,
- TYPE_DOUBLE,
- TYPE_FLOAT,
- TYPE_INTEGER,
- TYPE_LONG,
- TYPE_SHORT,
- TYPE_BOOLEAN,
- # object types
- TYPE_OBJECT,
- TYPE_ARRAY]
-
- BASE_REFERENCE_IDX = 0x7E0000
-
- NUMPY_TYPE_MAP = {
- TYPE_BYTE: 'B',
- TYPE_CHAR: 'b',
- TYPE_DOUBLE: '>d',
- TYPE_FLOAT: '>f',
- TYPE_INTEGER: '>i',
- TYPE_LONG: '>l',
- TYPE_SHORT: '>h',
- TYPE_BOOLEAN: '>B'
- }
-
-
-class OpCodeDebug(object):
- """
- OP Codes definition and utility methods
- """
- # Type codes
- OP_CODE = dict((getattr(JavaObjectConstants, key), key)
- for key in dir(JavaObjectConstants)
- if key.startswith("TC_"))
-
- TYPE = dict((getattr(JavaObjectConstants, key), key)
- for key in dir(JavaObjectConstants)
- if key.startswith("TYPE_"))
-
- STREAM_CONSTANT = dict((getattr(JavaObjectConstants, key), key)
- for key in dir(JavaObjectConstants)
- if key.startswith("SC_"))
-
- @staticmethod
- def op_id(op_id):
- """
- Returns the name of the given OP Code
- :param op_id: OP Code
- :return: Name of the OP Code
- """
- return OpCodeDebug.OP_CODE.get(
- op_id, "".format(op_id))
-
- @staticmethod
- def type_code(type_id):
- """
- Returns the name of the given Type Code
- :param type_id: Type code
- :return: Name of the type code
- """
- return OpCodeDebug.TYPE.get(
- type_id, "".format(type_id))
-
- @staticmethod
- def flags(flags):
- """
- Returns the names of the class description flags found in the given
- integer
-
- :param flags: A class description flag entry
- :return: The flags names as a single string
- """
- names = sorted(
- descr for key, descr in OpCodeDebug.STREAM_CONSTANT.items()
- if key & flags)
- return ', '.join(names)
-
-# ------------------------------------------------------------------------------
-
-
-class JavaObjectUnmarshaller(JavaObjectConstants):
- """
- Deserializes a Java serialization stream
- """
- def __init__(self, stream, use_numpy_arrays=False):
- """
- Sets up members
-
- :param stream: An input stream (opened in binary/bytes mode)
- :raise IOError: Invalid input stream
- """
- self.use_numpy_arrays = use_numpy_arrays
-
- # Check stream
- if stream is None:
- raise IOError("No input stream given")
-
- # Prepare the association Terminal Symbol -> Reading method
- self.opmap = {
- self.TC_NULL: self.do_null,
- self.TC_CLASSDESC: self.do_classdesc,
- self.TC_OBJECT: self.do_object,
- self.TC_STRING: self.do_string,
- self.TC_LONGSTRING: self.do_string_long,
- self.TC_ARRAY: self.do_array,
- self.TC_CLASS: self.do_class,
- self.TC_BLOCKDATA: self.do_blockdata,
- self.TC_BLOCKDATALONG: self.do_blockdata_long,
- self.TC_REFERENCE: self.do_reference,
- self.TC_ENUM: self.do_enum,
- # note that we are reusing do_null:
- self.TC_ENDBLOCKDATA: self.do_null,
- }
-
- # Set up members
- self.current_object = None
- self.reference_counter = 0
- self.references = []
- self.object_transformers = []
- self.object_stream = stream
-
- # Read the stream header (magic & version)
- self._readStreamHeader()
-
- def readObject(self, ignore_remaining_data=False):
- """
- Reads an object from the input stream
-
- :param ignore_remaining_data: If True, don't log an error when
- unused trailing bytes are remaining
- :return: The unmarshalled object
- :raise Exception: Any exception that occurred during unmarshalling
- """
- try:
- # TODO: add expects
- _, res = self._read_and_exec_opcode(ident=0)
-
- position_bak = self.object_stream.tell()
- the_rest = self.object_stream.read()
- if not ignore_remaining_data and len(the_rest):
- log_error("Warning!!!!: Stream still has {0} bytes left. "
- "Enable debug mode of logging to see the hexdump."
- .format(len(the_rest)))
- log_debug("\n{0}".format(self._create_hexdump(the_rest)))
- else:
- log_debug("Java Object unmarshalled successfully!")
-
- self.object_stream.seek(position_bak)
- return res
- except Exception:
- self._oops_dump_state(ignore_remaining_data)
- raise
-
- def add_transformer(self, transformer):
- """
- Appends an object transformer to the deserialization process
-
- :param transformer: An object with a transform(obj) method
- """
- self.object_transformers.append(transformer)
-
- def _readStreamHeader(self):
- """
- Reads the magic header of a Java serialization stream
-
- :raise IOError: Invalid magic header (not a Java stream)
- """
- (magic, version) = self._readStruct(">HH")
- if magic != self.STREAM_MAGIC or version != self.STREAM_VERSION:
- raise IOError("The stream is not java serialized object. "
- "Invalid stream header: {0:04X}{1:04X}"
- .format(magic, version))
-
- def _read_and_exec_opcode(self, ident=0, expect=None):
- """
- Reads the next opcode, and executes its handler
-
- :param ident: Log identation level
- :param expect: A list of expected opcodes
- :return: A tuple: (opcode, result of the handler)
- :raise IOError: Read opcode is not one of the expected ones
- :raise RuntimeError: Unknown opcode
- """
- position = self.object_stream.tell()
- (opid,) = self._readStruct(">B")
- log_debug("OpCode: 0x{0:X} -- {1} (at offset 0x{2:X})"
- .format(opid, OpCodeDebug.op_id(opid), position), ident)
-
- if expect and opid not in expect:
- raise IOError(
- "Unexpected opcode 0x{0:X} -- {1} (at offset 0x{2:X})"
- .format(opid, OpCodeDebug.op_id(opid), position))
-
- try:
- handler = self.opmap[opid]
- except KeyError:
- raise RuntimeError(
- "Unknown OpCode in the stream: 0x{0:X} (at offset 0x{1:X})"
- .format(opid, position))
- else:
- return opid, handler(ident=ident)
-
- def _readStruct(self, unpack):
- """
- Reads from the input stream, using struct
-
- :param unpack: An unpack format string
- :return: The result of struct.unpack (tuple)
- :raise RuntimeError: End of stream reached during unpacking
- """
- length = struct.calcsize(unpack)
- ba = self.object_stream.read(length)
-
- if len(ba) != length:
- raise RuntimeError("Stream has been ended unexpectedly while "
- "unmarshaling.")
-
- return struct.unpack(unpack, ba)
-
- def _readString(self, length_fmt="H"):
- """
- Reads a serialized string
-
- :param length_fmt: Structure format of the string length (H or Q)
- :return: The deserialized string
- :raise RuntimeError: Unexpected end of stream
- """
- (length,) = self._readStruct(">{0}".format(length_fmt))
- ba = self.object_stream.read(length)
- return to_str(ba)
-
- def do_classdesc(self, parent=None, ident=0):
- """
- Handles a TC_CLASSDESC opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaClass object
- """
- # TC_CLASSDESC className serialVersionUID newHandle classDescInfo
- # classDescInfo:
- # classDescFlags fields classAnnotation superClassDesc
- # classDescFlags:
- # (byte) // Defined in Terminal Symbols and Constants
- # fields:
- # (short) fieldDesc[count]
-
- # fieldDesc:
- # primitiveDesc
- # objectDesc
- # primitiveDesc:
- # prim_typecode fieldName
- # objectDesc:
- # obj_typecode fieldName className1
- clazz = JavaClass()
- log_debug("[classdesc]", ident)
- class_name = self._readString()
- clazz.name = class_name
- log_debug("Class name: %s" % class_name, ident)
-
- # serialVersionUID is a Java (signed) long => 8 bytes
- serialVersionUID, classDescFlags = self._readStruct(">qB")
- clazz.serialVersionUID = serialVersionUID
- clazz.flags = classDescFlags
-
- self._add_reference(clazz, ident)
-
- log_debug("Serial: 0x{0:X} / {0:d} - classDescFlags: 0x{1:X} {2}"
- .format(serialVersionUID, classDescFlags,
- OpCodeDebug.flags(classDescFlags)), ident)
- (length,) = self._readStruct(">H")
- log_debug("Fields num: 0x{0:X}".format(length), ident)
-
- clazz.fields_names = []
- clazz.fields_types = []
- for fieldId in range(length):
- (typecode,) = self._readStruct(">B")
- field_name = self._readString()
- field_type = self._convert_char_to_type(typecode)
-
- log_debug("> Reading field {0}".format(field_name), ident)
-
- if field_type == self.TYPE_ARRAY:
- _, field_type = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_STRING, self.TC_REFERENCE))
-
- if type(field_type) is not JavaString:
- raise AssertionError("Field type must be a JavaString, "
- "not {0}".format(type(field_type)))
-
- elif field_type == self.TYPE_OBJECT:
- _, field_type = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_STRING, self.TC_REFERENCE))
-
- if type(field_type) is JavaClass:
- # FIXME: ugly trick
- field_type = JavaString(field_type.name)
-
- if type(field_type) is not JavaString:
- raise AssertionError("Field type must be a JavaString, "
- "not {0}".format(type(field_type)))
-
- log_debug("< FieldName: 0x{0:X} Name:{1} Type:{2} ID:{3}"
- .format(typecode, field_name, field_type, fieldId),
- ident)
- assert field_name is not None
- assert field_type is not None
-
- clazz.fields_names.append(field_name)
- clazz.fields_types.append(field_type)
-
- if parent:
- parent.__fields = clazz.fields_names
- parent.__types = clazz.fields_types
-
- # classAnnotation
- (opid,) = self._readStruct(">B")
- log_debug("OpCode: 0x{0:X} -- {1} (classAnnotation)"
- .format(opid, OpCodeDebug.op_id(opid)), ident)
- if opid != self.TC_ENDBLOCKDATA:
- raise NotImplementedError("classAnnotation isn't implemented yet")
-
- # superClassDesc
- log_debug("Reading Super Class of {0}".format(clazz.name), ident)
- _, superclassdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_NULL, self.TC_REFERENCE))
- log_debug("Super Class for {0}: {1}"
- .format(clazz.name, str(superclassdesc)), ident)
- clazz.superclass = superclassdesc
- return clazz
-
- def do_blockdata(self, parent=None, ident=0):
- """
- Handles TC_BLOCKDATA opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string containing the block data
- """
- # TC_BLOCKDATA (unsigned byte) (byte)[size]
- log_debug("[blockdata]", ident)
- (length,) = self._readStruct(">B")
- ba = self.object_stream.read(length)
-
- # Ensure we have an str
- return read_to_str(ba)
-
- def do_blockdata_long(self, parent=None, ident=0):
- """
- Handles TC_BLOCKDATALONG opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string containing the block data
- """
- # TC_BLOCKDATALONG (int) (byte)[size]
- log_debug("[blockdatalong]", ident)
- (length,) = self._readStruct(">I")
- ba = self.object_stream.read(length)
-
- # Ensure we have an str
- return read_to_str(ba)
-
- def do_class(self, parent=None, ident=0):
- """
- Handles TC_CLASS opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaClass object
- """
- # TC_CLASS classDesc newHandle
- log_debug("[class]", ident)
-
- # TODO: what to do with "(ClassDesc)prevObject".
- # (see 3rd line for classDesc:)
- _, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
- log_debug("Classdesc: {0}".format(classdesc), ident)
- self._add_reference(classdesc, ident)
- return classdesc
-
- def do_object(self, parent=None, ident=0):
- """
- Handles a TC_OBJECT opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaClass object
- """
- # TC_OBJECT classDesc newHandle classdata[] // data for each class
- java_object = JavaObject()
- log_debug("[object]", ident)
- log_debug("java_object.annotations just after instantiation: {0}"
- .format(java_object.annotations), ident)
-
- # TODO: what to do with "(ClassDesc)prevObject".
- # (see 3rd line for classDesc:)
- opcode, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
- # self.TC_REFERENCE hasn't shown in spec, but actually is here
-
- # Create object
- for transformer in self.object_transformers:
- java_object = transformer.create(classdesc, self)
- if java_object is not None:
- break
-
- # Store classdesc of this object
- java_object.classdesc = classdesc
-
- # Store the reference
- self._add_reference(java_object, ident)
-
- # classdata[]
-
- if classdesc.flags & self.SC_EXTERNALIZABLE \
- and not classdesc.flags & self.SC_BLOCK_DATA:
- # TODO:
- raise NotImplementedError("externalContents isn't implemented yet")
-
- if classdesc.flags & self.SC_SERIALIZABLE:
- # TODO: look at ObjectInputStream.readSerialData()
- # FIXME: Handle the SC_WRITE_METHOD flag
-
- # create megalist
- tempclass = classdesc
- megalist = []
- megatypes = []
- log_debug("Constructing class...", ident)
- while tempclass:
- log_debug("Class: {0}".format(tempclass.name), ident + 1)
- class_fields_str = ' - '.join(
- ' '.join((field_type, field_name))
- for field_type, field_name
- in zip(tempclass.fields_types, tempclass.fields_names))
- if class_fields_str:
- log_debug(class_fields_str, ident + 2)
-
- fieldscopy = tempclass.fields_names[:]
- fieldscopy.extend(megalist)
- megalist = fieldscopy
-
- fieldscopy = tempclass.fields_types[:]
- fieldscopy.extend(megatypes)
- megatypes = fieldscopy
-
- tempclass = tempclass.superclass
-
- log_debug("Values count: {0}".format(len(megalist)), ident)
- log_debug("Prepared list of values: {0}".format(megalist), ident)
- log_debug("Prepared list of types: {0}".format(megatypes), ident)
-
- for field_name, field_type in zip(megalist, megatypes):
- log_debug("Reading field: {0} - {1}"
- .format(field_type, field_name))
- res = self._read_value(field_type, ident, name=field_name)
- java_object.__setattr__(field_name, res)
-
- if classdesc.flags & self.SC_SERIALIZABLE \
- and classdesc.flags & self.SC_WRITE_METHOD \
- or classdesc.flags & self.SC_EXTERNALIZABLE \
- and classdesc.flags & self.SC_BLOCK_DATA:
- # objectAnnotation
- log_debug("java_object.annotations before: {0}"
- .format(java_object.annotations), ident)
-
- while opcode != self.TC_ENDBLOCKDATA:
- opcode, obj = self._read_and_exec_opcode(ident=ident + 1)
- # , expect=[self.TC_ENDBLOCKDATA, self.TC_BLOCKDATA,
- # self.TC_OBJECT, self.TC_NULL, self.TC_REFERENCE])
- if opcode != self.TC_ENDBLOCKDATA:
- java_object.annotations.append(obj)
-
- log_debug("objectAnnotation value: {0}".format(obj), ident)
-
- log_debug("java_object.annotations after: {0}"
- .format(java_object.annotations), ident)
-
- # Allow extra loading operations
- if hasattr(java_object, "__extra_loading__"):
- log_debug("Java object has extra loading capability.")
- java_object.__extra_loading__(self, ident)
-
- log_debug(">>> java_object: {0}".format(java_object), ident)
- return java_object
-
- def do_string(self, parent=None, ident=0):
- """
- Handles a TC_STRING opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string
- """
- log_debug("[string]", ident)
- ba = JavaString(self._readString())
- self._add_reference(ba, ident)
- return ba
-
- def do_string_long(self, parent=None, ident=0):
- """
- Handles a TC_LONGSTRING opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string
- """
- log_debug("[long string]", ident)
- ba = JavaString(self._readString("Q"))
- self._add_reference(ba, ident)
- return ba
-
- def do_array(self, parent=None, ident=0):
- """
- Handles a TC_ARRAY opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A list of deserialized objects
- """
- # TC_ARRAY classDesc newHandle (int) values[size]
- log_debug("[array]", ident)
- _, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
-
- array = JavaArray(classdesc)
-
- self._add_reference(array, ident)
-
- (size,) = self._readStruct(">i")
- log_debug("size: {0}".format(size), ident)
-
- type_char = classdesc.name[0]
- assert type_char == self.TYPE_ARRAY
- type_char = classdesc.name[1]
-
- if type_char == self.TYPE_OBJECT or type_char == self.TYPE_ARRAY:
- for _ in range(size):
- _, res = self._read_and_exec_opcode(ident=ident + 1)
- log_debug("Object value: {0}".format(res), ident)
- array.append(res)
- elif type_char == self.TYPE_BYTE:
- array = JavaByteArray(self.object_stream.read(size), classdesc)
- elif self.use_numpy_arrays:
- import numpy
- array = numpy.fromfile(
- self.object_stream,
- dtype=JavaObjectConstants.NUMPY_TYPE_MAP[type_char],
- count=size)
- else:
- for _ in range(size):
- res = self._read_value(type_char, ident)
- log_debug("Native value: {0}".format(res), ident)
- array.append(res)
-
- return array
-
- def do_reference(self, parent=None, ident=0):
- """
- Handles a TC_REFERENCE opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: The referenced object
- """
- (handle,) = self._readStruct(">L")
- log_debug("## Reference handle: 0x{0:X}".format(handle), ident)
- ref = self.references[handle - self.BASE_REFERENCE_IDX]
- log_debug("###-> Type: {0} - Value: {1}".format(type(ref), ref), ident)
- return ref
-
- @staticmethod
- def do_null(parent=None, ident=0):
- """
- Handles a TC_NULL opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: Always None
- """
- return None
-
- def do_enum(self, parent=None, ident=0):
- """
- Handles a TC_ENUM opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaEnum object
- """
- # TC_ENUM classDesc newHandle enumConstantName
- enum = JavaEnum()
- _, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
- enum.classdesc = classdesc
- self._add_reference(enum, ident)
- _, enumConstantName = self._read_and_exec_opcode(
- ident=ident + 1, expect=(self.TC_STRING, self.TC_REFERENCE))
- enum.constant = enumConstantName
- return enum
-
- @staticmethod
- def _create_hexdump(src, start_offset=0, length=16):
- """
- Prepares an hexadecimal dump string
-
- :param src: A string containing binary data
- :param start_offset: The start offset of the source
- :param length: Length of a dump line
- :return: A dump string
- """
- FILTER = ''.join((len(repr(chr(x))) == 3) and chr(x) or '.'
- for x in range(256))
- pattern = "{{0:04X}} {{1:<{0}}} {{2}}\n".format(length * 3)
-
- # Convert raw data to str (Python 3 compatibility)
- src = to_str(src, 'latin-1')
-
- result = []
- for i in range(0, len(src), length):
- s = src[i:i + length]
- hexa = ' '.join("{0:02X}".format(ord(x)) for x in s)
- printable = s.translate(FILTER)
- result.append(pattern.format(i + start_offset, hexa, printable))
-
- return ''.join(result)
-
- def _read_value(self, field_type, ident, name=""):
- """
- Reads the next value, of the given type
-
- :param field_type: A serialization typecode
- :param ident: Log indentation
- :param name: Field name (for logs)
- :return: The read value
- :raise RuntimeError: Unknown field type
- """
- if len(field_type) > 1:
- # We don't need details for arrays and objects
- field_type = field_type[0]
-
- if field_type == self.TYPE_BOOLEAN:
- (val,) = self._readStruct(">B")
- res = bool(val)
- elif field_type == self.TYPE_BYTE:
- (res,) = self._readStruct(">b")
- elif field_type == self.TYPE_CHAR:
- # TYPE_CHAR is defined by the serialization specification
- # but not used in the implementation, so this is
- # a hypothetical code
- res = bytes(self._readStruct(">bb")).decode("utf-16-be")
- elif field_type == self.TYPE_SHORT:
- (res,) = self._readStruct(">h")
- elif field_type == self.TYPE_INTEGER:
- (res,) = self._readStruct(">i")
- elif field_type == self.TYPE_LONG:
- (res,) = self._readStruct(">q")
- elif field_type == self.TYPE_FLOAT:
- (res,) = self._readStruct(">f")
- elif field_type == self.TYPE_DOUBLE:
- (res,) = self._readStruct(">d")
- elif field_type == self.TYPE_OBJECT or field_type == self.TYPE_ARRAY:
- _, res = self._read_and_exec_opcode(ident=ident + 1)
- else:
- raise RuntimeError("Unknown typecode: {0}".format(field_type))
-
- log_debug("* {0} {1}: {2}".format(field_type, name, res), ident)
- return res
-
- def _convert_char_to_type(self, type_char):
- """
- Ensures a read character is a typecode.
-
- :param type_char: Read typecode
- :return: The typecode as a string (using chr)
- :raise RuntimeError: Unknown typecode
- """
- typecode = type_char
- if type(type_char) is int:
- typecode = chr(type_char)
-
- if typecode in self.TYPECODES_LIST:
- return typecode
- else:
- raise RuntimeError("Typecode {0} ({1}) isn't supported."
- .format(type_char, typecode))
-
- def _add_reference(self, obj, ident=0):
- """
- Adds a read reference to the marshaler storage
-
- :param obj: Reference to add
- :param ident: Log indentation level
- """
- log_debug("## New reference handle 0x{0:X}: {1} -> {2}"
- .format(len(self.references) + self.BASE_REFERENCE_IDX,
- type(obj).__name__, obj), ident)
- self.references.append(obj)
-
- def _oops_dump_state(self, ignore_remaining_data=False):
- """
- Log a deserialization error
-
- :param ignore_remaining_data: If True, don't log an error when
- unused trailing bytes are remaining
- """
- log_error("==Oops state dump" + "=" * (30 - 17))
- log_error("References: {0}".format(self.references))
- log_error("Stream seeking back at -16 byte (2nd line is an actual "
- "position!):")
-
- # Do not use a keyword argument
- self.object_stream.seek(-16, os.SEEK_CUR)
- position = self.object_stream.tell()
- the_rest = self.object_stream.read()
-
- if not ignore_remaining_data and len(the_rest):
- log_error(
- "Warning!!!!: Stream still has {0} bytes left:\n{1}".format(
- len(the_rest), self._create_hexdump(the_rest, position)))
-
- log_error("=" * 30)
-
-# ------------------------------------------------------------------------------
-
-
-class JavaObjectMarshaller(JavaObjectConstants):
- """
- Serializes objects into Java serialization format
- """
- def __init__(self, stream=None):
- """
- Sets up members
-
- :param stream: An output stream
- """
- self.object_stream = stream
- self.object_obj = None
- self.object_transformers = []
- self.references = []
-
- def add_transformer(self, transformer):
- """
- Appends an object transformer to the serialization process
-
- :param transformer: An object with a transform(obj) method
- """
- self.object_transformers.append(transformer)
-
- def dump(self, obj):
- """
- Dumps the given object in the Java serialization format
- """
- self.references = []
- self.object_obj = obj
- self.object_stream = BytesIO()
- self._writeStreamHeader()
- self.writeObject(obj)
- return self.object_stream.getvalue()
-
- def _writeStreamHeader(self):
- """
- Writes the Java serialization magic header in the serialization stream
- """
- self._writeStruct(">HH", 4, (self.STREAM_MAGIC, self.STREAM_VERSION))
-
- def writeObject(self, obj):
- """
- Appends an object to the serialization stream
-
- :param obj: A string or a deserialized Java object
- :raise RuntimeError: Unsupported type
- """
- log_debug("Writing object of type {0}".format(type(obj).__name__))
- if isinstance(obj, JavaArray):
- # Deserialized Java array
- self.write_array(obj)
- elif isinstance(obj, JavaEnum):
- # Deserialized Java Enum
- self.write_enum(obj)
- elif isinstance(obj, JavaObject):
- # Deserialized Java object
- self.write_object(obj)
- elif isinstance(obj, JavaString):
- # Deserialized String
- self.write_string(obj)
- elif isinstance(obj, JavaClass):
- # Java class
- self.write_class(obj)
- elif obj is None:
- # Null
- self.write_null()
- elif type(obj) is str:
- # String value
- self.write_blockdata(obj)
- else:
- # Unhandled type
- raise RuntimeError("Object serialization of type {0} is not "
- "supported.".format(type(obj)))
-
- def _writeStruct(self, unpack, length, args):
- """
- Appends data to the serialization stream
-
- :param unpack: Struct format string
- :param length: Unused
- :param args: Struct arguments
- """
- ba = struct.pack(unpack, *args)
- self.object_stream.write(ba)
-
- def _writeString(self, obj, use_reference=True):
- """
- Appends a string to the serialization stream
-
- :param obj: String to serialize
- :param use_reference: If True, allow writing a reference
- """
- # TODO: Convert to "modified UTF-8"
- # http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8
- string = to_bytes(obj, "utf-8")
-
- if use_reference and isinstance(obj, JavaString):
- try:
- idx = self.references.index(obj)
- except ValueError:
- # First appearance of the string
- self.references.append(obj)
- logging.debug(
- "*** Adding ref 0x%X for string: %s",
- len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj)
-
- self._writeStruct(">H", 2, (len(string),))
- self.object_stream.write(string)
- else:
- # Write a reference to the previous type
- logging.debug("*** Reusing ref 0x%X for string: %s",
- idx + self.BASE_REFERENCE_IDX, obj)
- self.write_reference(idx)
- else:
- self._writeStruct(">H", 2, (len(string),))
- self.object_stream.write(string)
-
- def write_string(self, obj, use_reference=True):
- """
- Writes a Java string with the TC_STRING type marker
-
- :param obj: The string to print
- :param use_reference: If True, allow writing a reference
- """
- if use_reference and isinstance(obj, JavaString):
- try:
- idx = self.references.index(obj)
- except ValueError:
- # String is not referenced: let _writeString store it
- self._writeStruct(">B", 1, (self.TC_STRING,))
- self._writeString(obj, use_reference)
- else:
- # Reuse the referenced string
- logging.debug("*** Reusing ref 0x%X for String: %s",
- idx + self.BASE_REFERENCE_IDX, obj)
- self.write_reference(idx)
- else:
- # Don't use references
- self._writeStruct(">B", 1, (self.TC_STRING,))
- self._writeString(obj, use_reference)
-
- def write_enum(self, obj):
- """
- Writes an Enum value
-
- :param obj: A JavaEnum object
- """
- # FIXME: the output doesn't have the same references as the real
- # serializable form
- self._writeStruct(">B", 1, (self.TC_ENUM,))
-
- try:
- idx = self.references.index(obj)
- except ValueError:
- # New reference
- self.references.append(obj)
- logging.debug(
- "*** Adding ref 0x%X for enum: %s",
- len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj)
-
- self.write_classdesc(obj.get_class())
- else:
- self.write_reference(idx)
-
- self.write_string(obj.constant)
-
- def write_blockdata(self, obj, parent=None):
- """
- Appends a block of data to the serialization stream
-
- :param obj: String form of the data block
- """
- if type(obj) is str:
- # Latin-1: keep bytes as is
- obj = to_bytes(obj, "latin-1")
-
- length = len(obj)
- if length <= 256:
- # Small block data
- # TC_BLOCKDATA (unsigned byte) (byte)[size]
- self._writeStruct(">B", 1, (self.TC_BLOCKDATA,))
- self._writeStruct(">B", 1, (length,))
- else:
- # Large block data
- # TC_BLOCKDATALONG (unsigned int) (byte)[size]
- self._writeStruct(">B", 1, (self.TC_BLOCKDATALONG,))
- self._writeStruct(">I", 1, (length,))
-
- self.object_stream.write(obj)
-
- def write_null(self):
- """
- Writes a "null" value
- """
- self._writeStruct(">B", 1, (self.TC_NULL,))
-
- def write_object(self, obj, parent=None):
- """
- Writes an object header to the serialization stream
-
- :param obj: Not yet used
- :param parent: Not yet used
- """
- # Transform object
- for transformer in self.object_transformers:
- tmp_object = transformer.transform(obj)
- if tmp_object is not obj:
- obj = tmp_object
- break
-
- self._writeStruct(">B", 1, (self.TC_OBJECT,))
- cls = obj.get_class()
- self.write_classdesc(cls)
-
- # Add reference
- self.references.append([])
- logging.debug(
- "*** Adding ref 0x%X for object %s",
- len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj)
-
- all_names = collections.deque()
- all_types = collections.deque()
- tmpcls = cls
- while tmpcls:
- all_names.extendleft(reversed(tmpcls.fields_names))
- all_types.extendleft(reversed(tmpcls.fields_types))
- tmpcls = tmpcls.superclass
- del tmpcls
-
- logging.debug("<=> Field names: %s", all_names)
- logging.debug("<=> Field types: %s", all_types)
-
- for field_name, field_type in zip(all_names, all_types):
- try:
- logging.debug("Writing field %s (%s): %s",
- field_name, field_type, getattr(obj, field_name))
- self._write_value(field_type, getattr(obj, field_name))
- except AttributeError as ex:
- log_error("No attribute {0} for object {1}\nDir: {2}"
- .format(ex, repr(obj), dir(obj)))
- raise
- del all_names, all_types
-
- if cls.flags & self.SC_SERIALIZABLE \
- and cls.flags & self.SC_WRITE_METHOD \
- or cls.flags & self.SC_EXTERNALIZABLE \
- and cls.flags & self.SC_BLOCK_DATA:
- for annotation in obj.annotations:
- log_debug("Write annotation {0} for {1}"
- .format(repr(annotation), repr(obj)))
- if annotation is None:
- self.write_null()
- else:
- self.writeObject(annotation)
- self._writeStruct('>B', 1, (self.TC_ENDBLOCKDATA,))
-
- def write_class(self, obj, parent=None):
- """
- Writes a class to the stream
-
- :param obj: A JavaClass object
- :param parent:
- """
- self._writeStruct(">B", 1, (self.TC_CLASS,))
- self.write_classdesc(obj)
-
- def write_classdesc(self, obj, parent=None):
- """
- Writes a class description
-
- :param obj: Class description to write
- :param parent:
- """
- if obj not in self.references:
- # Add reference
- self.references.append(obj)
- logging.debug(
- "*** Adding ref 0x%X for classdesc %s",
- len(self.references) - 1 + self.BASE_REFERENCE_IDX, obj.name)
-
- self._writeStruct(">B", 1, (self.TC_CLASSDESC,))
- self._writeString(obj.name)
- self._writeStruct(">qB", 1, (obj.serialVersionUID, obj.flags))
- self._writeStruct(">H", 1, (len(obj.fields_names),))
-
- for field_name, field_type \
- in zip(obj.fields_names, obj.fields_types):
- self._writeStruct(
- ">B", 1, (self._convert_type_to_char(field_type),))
- self._writeString(field_name)
- if field_type[0] in (self.TYPE_OBJECT, self.TYPE_ARRAY):
- try:
- idx = self.references.index(field_type)
- except ValueError:
- # First appearance of the type
- self.references.append(field_type)
- logging.debug(
- "*** Adding ref 0x%X for field type %s",
- len(self.references) - 1 + self.BASE_REFERENCE_IDX,
- field_type)
-
- self.write_string(field_type, False)
- else:
- # Write a reference to the previous type
- logging.debug("*** Reusing ref 0x%X for %s (%s)",
- idx + self.BASE_REFERENCE_IDX,
- field_type, field_name)
- self.write_reference(idx)
-
- self._writeStruct(">B", 1, (self.TC_ENDBLOCKDATA,))
- if obj.superclass:
- self.write_classdesc(obj.superclass)
- else:
- self.write_null()
- else:
- # Use reference
- self.write_reference(self.references.index(obj))
-
- def write_reference(self, ref_index):
- """
- Writes a reference
- :param ref_index: Local index (0-based) to the reference
- """
- self._writeStruct(
- ">BL", 1, (self.TC_REFERENCE, ref_index + self.BASE_REFERENCE_IDX))
-
- def write_array(self, obj):
- """
- Writes a JavaArray
-
- :param obj: A JavaArray object
- """
- classdesc = obj.get_class()
- self._writeStruct(">B", 1, (self.TC_ARRAY,))
- self.write_classdesc(classdesc)
- self._writeStruct(">i", 1, (len(obj),))
-
- # Add reference
- self.references.append(obj)
- logging.debug(
- "*** Adding ref 0x%X for array []",
- len(self.references) - 1 + self.BASE_REFERENCE_IDX)
-
- type_char = classdesc.name[0]
- assert type_char == self.TYPE_ARRAY
- type_char = classdesc.name[1]
-
- if type_char == self.TYPE_OBJECT:
- for o in obj:
- self._write_value(classdesc.name[1:], o)
- elif type_char == self.TYPE_ARRAY:
- for a in obj:
- self.write_array(a)
- else:
- log_debug("Write array of type %s" % type_char)
- for v in obj:
- log_debug("Writing: %s" % v)
- self._write_value(type_char, v)
-
- def _write_value(self, field_type, value):
- """
- Writes an item of an array
-
- :param field_type: Value type
- :param value: The value itself
- """
- if len(field_type) > 1:
- # We don't need details for arrays and objects
- field_type = field_type[0]
-
- if field_type == self.TYPE_BOOLEAN:
- self._writeStruct(">B", 1, (1 if value else 0,))
- elif field_type == self.TYPE_BYTE:
- self._writeStruct(">b", 1, (value,))
- elif field_type == self.TYPE_SHORT:
- self._writeStruct(">h", 1, (value,))
- elif field_type == self.TYPE_INTEGER:
- self._writeStruct(">i", 1, (value,))
- elif field_type == self.TYPE_LONG:
- self._writeStruct(">q", 1, (value,))
- elif field_type == self.TYPE_FLOAT:
- self._writeStruct(">f", 1, (value,))
- elif field_type == self.TYPE_DOUBLE:
- self._writeStruct(">d", 1, (value,))
- elif field_type == self.TYPE_OBJECT or field_type == self.TYPE_ARRAY:
- if value is None:
- self.write_null()
- elif isinstance(value, JavaEnum):
- self.write_enum(value)
- elif isinstance(value, (JavaArray, JavaByteArray)):
- self.write_array(value)
- elif isinstance(value, JavaObject):
- self.write_object(value)
- elif isinstance(value, JavaString):
- self.write_string(value)
- elif isinstance(value, str):
- self.write_blockdata(value)
- else:
- raise RuntimeError("Unknown typecode: {0}".format(field_type))
- else:
- raise RuntimeError("Unknown typecode: {0}".format(field_type))
-
- def _convert_type_to_char(self, type_char):
- """
- Converts the given type code to an int
-
- :param type_char: A type code character
- """
- typecode = type_char
- if type(type_char) is int:
- typecode = chr(type_char)
-
- if typecode in self.TYPECODES_LIST:
- return ord(typecode)
- elif len(typecode) > 1:
- if typecode[0] == 'L':
- return ord(self.TYPE_OBJECT)
- elif typecode[0] == '[':
- return ord(self.TYPE_ARRAY)
-
- raise RuntimeError("Typecode {0} ({1}) isn't supported."
- .format(type_char, typecode))
-
-# ------------------------------------------------------------------------------
-
-
-class DefaultObjectTransformer(object):
- """
- Default transformer for the deserialized objects.
- Converts JavaObject objects to Python types (maps, lists, ...)
- """
- class JavaList(list, JavaObject):
- """
- Python-Java list bridge type
- """
- def __init__(self, unmarshaller):
- # type: (JavaObjectUnmarshaller) -> None
- list.__init__(self)
- JavaObject.__init__(self)
-
- def __extra_loading__(self, unmarshaller, ident=0):
- # type: (JavaObjectUnmarshaller, int) -> None
- """
- Loads the content of the map, written with a custom implementation
- """
- # Lists have their content in there annotations
- self.extend(self.annotations[1:])
-
- class JavaMap(dict, JavaObject):
- """
- Python-Java dictionary/map bridge type
- """
- def __init__(self, unmarshaller):
- # type: (JavaObjectUnmarshaller) -> None
- dict.__init__(self)
- JavaObject.__init__(self)
-
- def __extra_loading__(self, unmarshaller, ident=0):
- # type: (JavaObjectUnmarshaller, int) -> None
- """
- Loads the content of the map, written with a custom implementation
- """
- # Group annotation elements 2 by 2
- args = [iter(self.annotations[1:])] * 2
- for key, value in zip(*args):
- self[key] = value
-
- class JavaLinkedHashMap(JavaMap):
- def __extra_loading__(self, unmarshaller, ident=0):
- # type: (JavaObjectUnmarshaller, int) -> None
- """
- Loads the content of the map, written with a custom implementation
- """
- # Ignore the blockdata opid
- (opid,) = unmarshaller._readStruct(">B")
- if opid != unmarshaller.SC_BLOCK_DATA:
- raise ValueError("Start of block data not found")
-
- # Read HashMap fields
- self.buckets = unmarshaller._read_value(
- unmarshaller.TYPE_INTEGER, ident)
- self.size = unmarshaller._read_value(
- unmarshaller.TYPE_INTEGER, ident)
-
- # Read entries
- for _ in range(self.size):
- key = unmarshaller._read_and_exec_opcode()[1]
- value = unmarshaller._read_and_exec_opcode()[1]
- self[key] = value
-
- # Ignore the end of the blockdata
- unmarshaller._read_and_exec_opcode(
- ident, [unmarshaller.TC_ENDBLOCKDATA])
-
- # Ignore the trailing 0
- (opid,) = unmarshaller._readStruct(">B")
- if opid != 0:
- raise ValueError("Should find 0x0, got {0:x}".format(opid))
-
- TYPE_MAPPER = {
- "java.util.ArrayList": JavaList,
- "java.util.LinkedList": JavaList,
- "java.util.HashMap": JavaMap,
- "java.util.LinkedHashMap": JavaLinkedHashMap,
- "java.util.TreeMap": JavaMap,
- }
-
- def create(self, classdesc, unmarshaller=None):
- # type: (JavaClass, JavaObjectUnmarshaller) -> JavaObject
- """
- Transforms a deserialized Java object into a Python object
-
- :param classdesc: The description of a Java class
- :return: The Python form of the object, or the original JavaObject
- """
- try:
- mapped_type = self.TYPE_MAPPER[classdesc.name]
- except KeyError:
- # Return a JavaObject by default
- return JavaObject()
- else:
- log_debug("---")
- log_debug(classdesc.name)
- log_debug("---")
-
- java_object = mapped_type(unmarshaller)
-
- log_debug(">>> java_object: {0}".format(java_object))
- return java_object
diff --git a/javaobj/__init__.py b/javaobj/__init__.py
new file mode 100644
index 0000000..d1b146d
--- /dev/null
+++ b/javaobj/__init__.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for reading and writing (writing is WIP currently) Java
+objects serialized or will be deserialized by ObjectOutputStream. This form of
+object representation is a standard data interchange format in Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+# Imports giving access to what the javaobj module provides
+from javaobj.v1.beans import * # noqa: F401,F403
+from javaobj.v1.core import * # noqa: F401,F403
+from javaobj.v1.transformers import * # noqa: F401,F403
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
diff --git a/javaobj/constants.py b/javaobj/constants.py
new file mode 100644
index 0000000..d4dd1cb
--- /dev/null
+++ b/javaobj/constants.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""
+Definition of the constants used in the deserialization process
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import enum
+
+# ------------------------------------------------------------------------------
+
+__all__ = (
+ "PRIMITIVE_TYPES",
+ "StreamConstants",
+ "TerminalCode",
+ "ClassDescFlags",
+ "TypeCode",
+ "StreamCodeDebug",
+)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class StreamConstants(enum.IntEnum):
+ """
+ Basic constants of the stream protocol
+ """
+
+ # Magic bytes of any serialized files
+ STREAM_MAGIC = 0xACED
+
+ # Only protocol version supported by javaobj
+ STREAM_VERSION = 0x05
+
+ # Base index for handles
+ BASE_REFERENCE_IDX = 0x7E0000
+
+
+class TerminalCode(enum.IntEnum):
+ """
+ Stream type Codes
+ """
+
+ TC_NULL = 0x70
+ TC_REFERENCE = 0x71
+ TC_CLASSDESC = 0x72
+ TC_OBJECT = 0x73
+ TC_STRING = 0x74
+ TC_ARRAY = 0x75
+ TC_CLASS = 0x76
+ TC_BLOCKDATA = 0x77
+ TC_ENDBLOCKDATA = 0x78
+ TC_RESET = 0x79
+ TC_BLOCKDATALONG = 0x7A
+ TC_EXCEPTION = 0x7B
+ TC_LONGSTRING = 0x7C
+ TC_PROXYCLASSDESC = 0x7D
+ TC_ENUM = 0x7E
+ # Ignore TC_MAX: we don't use it and it messes with TC_ENUM
+ # TC_MAX = 0x7E
+
+
+class ClassDescFlags(enum.IntEnum):
+ """
+ Class description flags
+ """
+
+ SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE
+ SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE
+ SC_SERIALIZABLE = 0x02
+ SC_EXTERNALIZABLE = 0x04
+ SC_ENUM = 0x10
+
+
+class TypeCode(enum.IntEnum):
+ """
+ Type definition chars (typecode)
+ """
+
+ # Primitive types
+ TYPE_BYTE = ord("B") # 0x42
+ TYPE_CHAR = ord("C") # 0x43
+ TYPE_DOUBLE = ord("D") # 0x44
+ TYPE_FLOAT = ord("F") # 0x46
+ TYPE_INTEGER = ord("I") # 0x49
+ TYPE_LONG = ord("J") # 0x4A
+ TYPE_SHORT = ord("S") # 0x53
+ TYPE_BOOLEAN = ord("Z") # 0x5A
+ # Object types
+ TYPE_OBJECT = ord("L") # 0x4C
+ TYPE_ARRAY = ord("[") # 0x5B
+
+
+# List of the types defined as primitive
+PRIMITIVE_TYPES = (
+ TypeCode.TYPE_BYTE,
+ TypeCode.TYPE_CHAR,
+ TypeCode.TYPE_DOUBLE,
+ TypeCode.TYPE_FLOAT,
+ TypeCode.TYPE_INTEGER,
+ TypeCode.TYPE_LONG,
+ TypeCode.TYPE_SHORT,
+ TypeCode.TYPE_BOOLEAN,
+)
+
+
+class StreamCodeDebug:
+ """
+ Codes utility methods
+ """
+
+ @staticmethod
+ def op_id(op_id):
+ # type: (int) -> str
+ """
+ Returns the name of the given OP Code
+ :param op_id: OP Code
+ :return: Name of the OP Code
+ """
+ try:
+ return TerminalCode(op_id).name
+ except ValueError:
+ return "".format(op_id)
+
+ @staticmethod
+ def type_code(type_id):
+ # type: (int) -> str
+ """
+ Returns the name of the given Type Code
+ :param type_id: Type code
+ :return: Name of the type code
+ """
+ try:
+ return TypeCode(type_id).name
+ except ValueError:
+ return "".format(type_id)
+
+ @staticmethod
+ def flags(flags):
+ # type: (int) -> str
+ """
+ Returns the names of the class description flags found in the given
+ integer
+
+ :param flags: A class description flag entry
+ :return: The flags names as a single string
+ """
+ names = sorted(key.name for key in ClassDescFlags if key & flags)
+ return ", ".join(names)
diff --git a/javaobj/modifiedutf8.py b/javaobj/modifiedutf8.py
new file mode 100644
index 0000000..ac29ce5
--- /dev/null
+++ b/javaobj/modifiedutf8.py
@@ -0,0 +1,255 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Implements the support of the Java-specific kind of UTF-8 encoding.
+
+This module is a modified version of ``py2jdbc.mutf8`` provided by
+`@guywithface `_.
+
+The project the original file comes from is available at:
+https://github.com/swstephe/py2jdbc/
+
+:authors: Scott Stephens (@swstephe), @guywithface
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+"""
+
+from __future__ import unicode_literals
+
+import sys
+
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# Encoding name: not cesu-8, which uses a different zero-byte
+NAME = "mutf8"
+
+# ------------------------------------------------------------------------------
+
+if sys.version_info[0] >= 3:
+ unicode_char = chr # pylint:disable=C0103
+
+ def byte_to_int(data):
+ # type: (bytes) -> int
+ """
+ Converts the first byte of the given data to an integer
+ """
+ if isinstance(data, int):
+ return data
+
+ if isinstance(data, bytes):
+ return data[0]
+
+ raise ValueError(
+ "Expected byte or int as input, got: {0}".format(
+ type(data).__name__
+ )
+ )
+
+
+else:
+ unicode_char = (
+ unichr # pylint:disable=C0103,undefined-variable # noqa: F821
+ )
+
+ def byte_to_int(data):
+ # type: (bytes) -> int
+ """
+ Converts the first byte of the given data to an integer
+ """
+ if isinstance(data, int):
+ return data
+
+ if isinstance(data, str):
+ return ord(data[0])
+
+ raise ValueError(
+ "Expected byte or int as input, got: {0}".format(
+ type(data).__name__
+ )
+ )
+
+
+# ------------------------------------------------------------------------------
+
+
+class DecodeMap(object): # pylint:disable=R0205
+ """
+ A utility class which manages masking, comparing and mapping in bits.
+ If the mask and compare fails, this will raise UnicodeDecodeError so
+ encode and decode will correctly handle bad characters.
+ """
+
+ def __init__(self, count, mask, value, bits):
+ """
+ Initialize a DecodeMap, entry from a static dictionary for the module.
+ It automatically calculates the mask for the bits for the value
+ (always assumed to be at the bottom of the byte).
+
+ :param count: The number of bytes in this entire sequence.
+ :param mask: The mask to apply to the byte at this position.
+ :param value: The value of masked bits, (without shifting).
+ :param bits: The number of bits.
+ """
+ self.count = count
+ self.mask = mask
+ self.value = value
+ self.bits = bits
+ self.mask2 = (1 << bits) - 1
+
+ def apply(self, byte, value, data, i, count):
+ """
+ Apply mask, compare to expected value, shift and return result.
+ Eventually, this could become a ``reduce`` function.
+
+ :param byte: The byte to compare
+ :param value: The currently accumulated value.
+ :param data: The data buffer, (array of bytes).
+ :param i: The position within the data buffer.
+ :param count: The position of this comparison.
+ :return: A new value with the bits merged in.
+ :raises UnicodeDecodeError: if marked bits don't match.
+ """
+ if byte & self.mask == self.value:
+ value <<= self.bits
+ value |= byte & self.mask2
+ else:
+ raise UnicodeDecodeError(
+ NAME,
+ data,
+ i,
+ i + count,
+ "invalid {}-byte sequence".format(self.count),
+ )
+ return value
+
+ def __repr__(self):
+ return "DecodeMap({})".format(
+ ", ".join(
+ "{}=0x{:02x}".format(n, getattr(self, n))
+ for n in ("count", "mask", "value", "bits", "mask2")
+ )
+ )
+
+
+DECODER_MAP = {
+ 2: ((0xC0, 0x80, 6),),
+ 3: ((0xC0, 0x80, 6), (0xC0, 0x80, 6)),
+ 6: (
+ (0xF0, 0xA0, 4),
+ (0xC0, 0x80, 6),
+ (0xFF, 0xED, 0),
+ (0xF0, 0xB0, 4),
+ (0xC0, 0x80, 6),
+ ),
+}
+
+DECODE_MAP = dict(
+ (k, tuple(DecodeMap(k, *vv) for vv in v)) for k, v in DECODER_MAP.items()
+)
+
+
+def decoder(data):
+ """
+ This generator processes a sequence of bytes in Modified UTF-8 encoding
+ and produces a sequence of unicode string characters.
+
+ It takes bits from the byte until it matches one of the known encoding
+ sequences.
+ It uses ``DecodeMap`` to mask, compare and generate values.
+
+ :param data: a string of bytes in Modified UTF-8 encoding.
+ :return: a generator producing a string of unicode characters
+ :raises UnicodeDecodeError: unrecognised byte in sequence encountered.
+ """
+
+ def next_byte(_it, start, count):
+ try:
+ return next(_it)[1]
+ except StopIteration:
+ raise UnicodeDecodeError(
+ NAME, data, start, start + count, "incomplete byte sequence"
+ )
+
+ it = iter(enumerate(data))
+ for i, d in it:
+ if d == 0x00: # 00000000
+ raise UnicodeDecodeError(
+ NAME, data, i, i + 1, "embedded zero-byte not allowed"
+ )
+
+ if d & 0x80: # 1xxxxxxx
+ if d & 0x40: # 11xxxxxx
+ if d & 0x20: # 111xxxxx
+ if d & 0x10: # 1111xxxx
+ raise UnicodeDecodeError(
+ NAME, data, i, i + 1, "invalid encoding character"
+ )
+
+ if d == 0xED:
+ value = 0
+ for i1, dm in enumerate(DECODE_MAP[6]):
+ d1 = next_byte(it, i, i1 + 1)
+ value = dm.apply(d1, value, data, i, i1 + 1)
+ else: # 1110xxxx
+ value = d & 0x0F
+ for i1, dm in enumerate(DECODE_MAP[3]):
+ d1 = next_byte(it, i, i1 + 1)
+ value = dm.apply(d1, value, data, i, i1 + 1)
+ else: # 110xxxxx
+ value = d & 0x1F
+ for i1, dm in enumerate(DECODE_MAP[2]):
+ d1 = next_byte(it, i, i1 + 1)
+ value = dm.apply(d1, value, data, i, i1 + 1)
+ else: # 10xxxxxx
+ raise UnicodeDecodeError(
+ NAME, data, i, i + 1, "misplaced continuation character"
+ )
+ else: # 0xxxxxxx
+ value = d
+ # noinspection PyCompatibility
+ yield mutf8_unichr(value)
+
+
+def decode_modified_utf8(data, errors="strict"):
+ """
+ Decodes a sequence of bytes to a unicode text and length using
+ Modified UTF-8.
+ This function is designed to be used with Python ``codecs`` module.
+
+ :param data: a string of bytes in Modified UTF-8
+ :param errors: handle decoding errors
+ :return: unicode text and length
+ :raises UnicodeDecodeError: sequence is invalid.
+ """
+ value, length = "", 0
+ it = iter(decoder(byte_to_int(d) for d in data))
+ while True:
+ try:
+ value += next(it)
+ length += 1
+ except StopIteration:
+ break
+ except UnicodeDecodeError as e:
+ if errors == "strict":
+ raise e
+
+ if errors == "ignore":
+ pass
+ elif errors == "replace":
+ value += "\uFFFD"
+ length += 1
+ return value, length
+
+
+def mutf8_unichr(value):
+ """
+ Mimics Python 2 unichr() and Python 3 chr()
+ """
+ return unicode_char(value)
diff --git a/javaobj/utils.py b/javaobj/utils.py
new file mode 100644
index 0000000..2d6f761
--- /dev/null
+++ b/javaobj/utils.py
@@ -0,0 +1,276 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides utility methods used by the core implementation of javaobj.
+
+Namely: logging methods, bytes/str/unicode converters
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+from typing import IO, Tuple # noqa: F401
+import gzip
+import logging
+import os
+import struct
+import sys
+
+# Modified UTF-8 parser
+from .modifiedutf8 import byte_to_int, decode_modified_utf8
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+# Setup the logger
+_log = logging.getLogger("javaobj")
+
+
+def log_debug(message, ident=0):
+ """
+ Logs a message at debug level
+
+ :param message: Message to log
+ :param ident: Number of indentation spaces
+ """
+ _log.debug("%s%s", " " * (ident * 2), message)
+
+
+def log_error(message, ident=0):
+ """
+ Logs a message at error level
+
+ :param message: Message to log
+ :param ident: Number of indentation spaces
+ """
+ _log.error("%s%s", " " * (ident * 2), message)
+
+
+# ------------------------------------------------------------------------------
+
+
+def read_struct(data, fmt_str):
+ # type: (bytes, str) -> Tuple
+ """
+ Reads input bytes and extract the given structure. Returns both the read
+ elements and the remaining data
+
+ :param data: Data as bytes
+ :param fmt_str: Struct unpack format string
+ :return: A tuple (results as tuple, remaining data)
+ """
+ size = struct.calcsize(fmt_str)
+ return struct.unpack(fmt_str, data[:size]), data[size:]
+
+
+def read_string(data, length_fmt="H"):
+ # type: (bytes, str) -> Tuple[UNICODE_TYPE, bytes]
+ """
+ Reads a serialized string
+
+ :param data: Bytes where to read the string from
+ :param length_fmt: Structure format of the string length (H or Q)
+ :return: The deserialized string
+ """
+ (length,), data = read_struct(data, ">{0}".format(length_fmt))
+ ba, data = data[:length], data[length:]
+ return to_unicode(ba), data
+
+
+# ------------------------------------------------------------------------------
+
+
+def java_data_fd(original_df):
+ # type: (IO[bytes]) -> IO[bytes]
+ """
+ Ensures that the input file descriptor contains a Java serialized content.
+ Automatically uncompresses GZipped data
+
+ :param original_df: Input file descriptor
+ :return: Input file descriptor or a fake one to access uncompressed data
+ :raise IOError: Error reading input file
+ """
+ # Read the first bytes
+ start_idx = original_df.tell()
+ magic_header = [byte_to_int(x) for x in original_df.read(2)] # type: ignore
+ original_df.seek(start_idx, os.SEEK_SET)
+
+ if magic_header[0] == 0xAC:
+ # Consider we have a raw seralized stream: use it
+ original_df.seek(start_idx, os.SEEK_SET)
+ return original_df
+ elif magic_header[0] == 0x1F and magic_header[1] == 0x8B:
+ # Open the GZip file
+ return gzip.GzipFile(fileobj=original_df, mode="rb") # type: ignore
+ else:
+ # Let the parser raise the error
+ return original_df
+
+
+# ------------------------------------------------------------------------------
+
+
+def hexdump(src, start_offset=0, length=16):
+ # type: (str, int, int) -> str
+ """
+ Prepares an hexadecimal dump string
+
+ :param src: A string containing binary data
+ :param start_offset: The start offset of the source
+ :param length: Length of a dump line
+ :return: A dump string
+ """
+ hex_filter = "".join(
+ (len(repr(chr(x))) == 3) and chr(x) or "." for x in range(256)
+ )
+ pattern = "{{0:04X}} {{1:<{0}}} {{2}}\n".format(length * 3)
+
+ # Convert raw data to str (Python 3 compatibility)
+ src = to_str(src, "latin-1")
+
+ result = []
+ for i in range(0, len(src), length):
+ s = src[i : i + length]
+ hexa = " ".join("{0:02X}".format(ord(x)) for x in s)
+ printable = s.translate(hex_filter)
+ result.append(pattern.format(i + start_offset, hexa, printable))
+
+ return "".join(result)
+
+
+# ------------------------------------------------------------------------------
+
+
+if sys.version_info[0] >= 3:
+ BYTES_TYPE = bytes # pylint:disable=C0103
+ UNICODE_TYPE = str # pylint:disable=C0103
+ unicode_char = chr # pylint:disable=C0103
+
+ def bytes_char(c):
+ """
+ Converts the given character to a bytes string
+ """
+ return bytes((c,))
+
+ # Python 3 interpreter : bytes & str
+ def to_bytes(data, encoding="UTF-8"):
+ """
+ Converts the given string to an array of bytes.
+ Returns the first parameter if it is already an array of bytes.
+
+ :param data: A unicode string
+ :param encoding: The encoding of data
+ :return: The corresponding array of bytes
+ """
+ if type(data) is bytes: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ return data.encode(encoding)
+
+ def to_str(data, encoding="UTF-8"):
+ """
+ Converts the given parameter to a string.
+ Returns the first parameter if it is already an instance of ``str``.
+
+ :param data: A string
+ :param encoding: The encoding of data
+ :return: The corresponding string
+ """
+ if type(data) is str: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ try:
+ return str(data, encoding)
+ except UnicodeDecodeError:
+ return decode_modified_utf8(data)[0]
+
+ # Same operation
+ to_unicode = to_str # pylint:disable=C0103
+
+ def read_to_str(data):
+ """
+ Concats all bytes into a string
+ """
+ return "".join(chr(char) for char in data)
+
+
+else:
+ BYTES_TYPE = str # pylint:disable=C0103
+ UNICODE_TYPE = (
+ unicode # pylint:disable=C0103,undefined-variable # noqa: F821
+ )
+ unicode_char = (
+ unichr # pylint:disable=C0103,undefined-variable # noqa: F821
+ )
+ bytes_char = chr # pylint:disable=C0103
+
+ # Python 2 interpreter : str & unicode
+ def to_str(data, encoding="UTF-8"):
+ """
+ Converts the given parameter to a string.
+ Returns the first parameter if it is already an instance of ``str``.
+
+ :param data: A string
+ :param encoding: The encoding of data
+ :return: The corresponding string
+ """
+ if type(data) is str: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ return data.encode(encoding)
+
+ # Same operation
+ to_bytes = to_str # pylint:disable=C0103
+
+ # Python 2 interpreter : str & unicode
+ def to_unicode(data, encoding="UTF-8"):
+ """
+ Converts the given parameter to a string.
+ Returns the first parameter if it is already an instance of ``str``.
+
+ :param data: A string
+ :param encoding: The encoding of data
+ :return: The corresponding string
+ """
+ if type(data) is UNICODE_TYPE: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ try:
+ return data.decode(encoding)
+ except UnicodeDecodeError:
+ return decode_modified_utf8(data)[0]
+
+ def read_to_str(data):
+ """
+ Nothing to do in Python 2
+ """
+ return data
diff --git a/javaobj/v1/__init__.py b/javaobj/v1/__init__.py
new file mode 100644
index 0000000..cc4aaaa
--- /dev/null
+++ b/javaobj/v1/__init__.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+"""
+First version of the un-marshalling process of javaobj.
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from . import beans, core, transformers # noqa: F401
+from .core import ( # noqa: F401
+ load,
+ loads,
+ dumps,
+ JavaObjectMarshaller,
+ JavaObjectUnmarshaller,
+)
+from .transformers import DefaultObjectTransformer # noqa: F401
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
diff --git a/javaobj/v1/beans.py b/javaobj/v1/beans.py
new file mode 100644
index 0000000..bf867bb
--- /dev/null
+++ b/javaobj/v1/beans.py
@@ -0,0 +1,225 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Definition of the beans of the v1 parser
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+from typing import List
+import struct
+
+from ..utils import UNICODE_TYPE
+
+# ------------------------------------------------------------------------------
+
+__all__ = (
+ "JavaArray",
+ "JavaByteArray",
+ "JavaClass",
+ "JavaEnum",
+ "JavaObject",
+ "JavaString",
+)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaClass(object): # pylint:disable=R0205
+ """
+ Represents a class in the Java world
+ """
+
+ def __init__(self):
+ """
+ Sets up members
+ """
+ self.name = None # type: str
+ self.serialVersionUID = None # type: int # pylint:disable=C0103
+ self.flags = None # type: int
+ self.fields_names = [] # type: List[str]
+ self.fields_types = [] # type: List[JavaString]
+ self.superclass = None # type: JavaClass
+
+ def __str__(self):
+ """
+ String representation of the Java class
+ """
+ return self.__repr__()
+
+ def __repr__(self):
+ """
+ String representation of the Java class
+ """
+ return "[{0:s}:0x{1:X}]".format(self.name, self.serialVersionUID)
+
+ def __eq__(self, other):
+ """
+ Equality test between two Java classes
+
+ :param other: Other JavaClass to test
+ :return: True if both classes share the same fields and name
+ """
+ if not isinstance(other, type(self)):
+ return False
+
+ return (
+ self.name == other.name
+ and self.serialVersionUID == other.serialVersionUID
+ and self.flags == other.flags
+ and self.fields_names == other.fields_names
+ and self.fields_types == other.fields_types
+ and self.superclass == other.superclass
+ )
+
+
+class JavaObject(object): # pylint:disable=R0205
+ """
+ Represents a deserialized non-primitive Java object
+ """
+
+ def __init__(self):
+ """
+ Sets up members
+ """
+ self.classdesc = None # type: JavaClass
+ self.annotations = []
+
+ def get_class(self):
+ """
+ Returns the JavaClass that defines the type of this object
+ """
+ return self.classdesc
+
+ def __str__(self):
+ """
+ String representation
+ """
+ return self.__repr__()
+
+ def __repr__(self):
+ """
+ String representation
+ """
+ name = "UNKNOWN"
+ if self.classdesc:
+ name = self.classdesc.name
+ return "".format(name)
+
+ def __hash__(self):
+ """
+ Each JavaObject we load must have a hash method to be accepted in sets
+ and alike. The default hash is the memory address of the object.
+ """
+ return id(self)
+
+ def __eq__(self, other):
+ """
+ Equality test between two Java classes
+
+ :param other: Other JavaClass to test
+ :return: True if both classes share the same fields and name
+ """
+ if not isinstance(other, type(self)):
+ return False
+
+ res = (
+ self.classdesc == other.classdesc
+ and self.annotations == other.annotations
+ )
+ if not res:
+ return False
+
+ for name in self.classdesc.fields_names:
+ if not getattr(self, name) == getattr(other, name):
+ return False
+ return True
+
+
+class JavaString(UNICODE_TYPE):
+ """
+ Represents a Java String
+ """
+
+ def __hash__(self):
+ return UNICODE_TYPE.__hash__(self)
+
+ def __eq__(self, other):
+ if not isinstance(other, UNICODE_TYPE):
+ return False
+ return UNICODE_TYPE.__eq__(self, other)
+
+
+class JavaEnum(JavaObject):
+ """
+ Represents a Java enumeration
+ """
+
+ def __init__(self, constant=None):
+ super(JavaEnum, self).__init__()
+ self.constant = constant
+
+
+class JavaArray(list, JavaObject):
+ """
+ Represents a Java Array
+ """
+
+ def __init__(self, classdesc=None):
+ list.__init__(self)
+ JavaObject.__init__(self)
+ self.classdesc = classdesc
+
+ def __hash__(self):
+ return list.__hash__(self)
+
+
+class JavaByteArray(JavaObject):
+ """
+ Represents the special case of Java Array which contains bytes
+ """
+
+ def __init__(self, data, classdesc=None):
+ JavaObject.__init__(self)
+ self._data = struct.unpack("b" * len(data), data)
+ self.classdesc = classdesc
+
+ def __str__(self):
+ return "JavaByteArray({0})".format(self._data)
+
+ def __getitem__(self, item):
+ return self._data[item]
+
+ def __iter__(self):
+ return iter(self._data)
+
+ def __len__(self):
+ return len(self._data)
diff --git a/javaobj/v1/core.py b/javaobj/v1/core.py
new file mode 100644
index 0000000..ae5eeb5
--- /dev/null
+++ b/javaobj/v1/core.py
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for reading and writing (writing is WIP currently) Java
+objects serialized or will be deserialized by ObjectOutputStream. This form of
+object representation is a standard data interchange format in Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+try:
+ # Python 2
+ from StringIO import StringIO as BytesIO
+except ImportError:
+ # Python 3+
+ from io import BytesIO
+
+# Javaobj modules
+from .marshaller import JavaObjectMarshaller
+from .unmarshaller import JavaObjectUnmarshaller
+from .transformers import DefaultObjectTransformer
+from ..utils import java_data_fd
+
+# ------------------------------------------------------------------------------
+
+__all__ = (
+ "__version_info__",
+ "__version__",
+ "JavaObjectMarshaller",
+ "JavaObjectUnmarshaller",
+ "dumps",
+ "load",
+ "loads",
+)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+def load(file_object, *transformers, **kwargs):
+ """
+ Deserializes Java primitive data and objects serialized using
+ ObjectOutputStream from a file-like object.
+
+ :param file_object: A file-like object
+ :param transformers: Custom transformers to use
+ :param ignore_remaining_data: If True, don't log an error when unused
+ trailing bytes are remaining
+ :return: The deserialized object
+ """
+ # Check file format (uncompress if necessary)
+ file_object = java_data_fd(file_object)
+
+ # Read keyword argument
+ ignore_remaining_data = kwargs.get("ignore_remaining_data", False)
+
+ marshaller = JavaObjectUnmarshaller(
+ file_object, kwargs.get("use_numpy_arrays", False)
+ )
+
+ # Add custom transformers first
+ for transformer in transformers:
+ marshaller.add_transformer(transformer)
+ marshaller.add_transformer(DefaultObjectTransformer())
+
+ # Read the file object
+ return marshaller.readObject(ignore_remaining_data=ignore_remaining_data)
+
+
+def loads(string, *transformers, **kwargs):
+ """
+ Deserializes Java objects and primitive data serialized using
+ ObjectOutputStream from a string.
+
+ :param string: A Java data string
+ :param transformers: Custom transformers to use
+ :param ignore_remaining_data: If True, don't log an error when unused
+ trailing bytes are remaining
+ :return: The deserialized object
+ """
+ # Reuse the load method (avoid code duplication)
+ return load(BytesIO(string), *transformers, **kwargs)
+
+
+def dumps(obj, *transformers):
+ """
+ Serializes Java primitive data and objects unmarshaled by load(s) before
+ into string.
+
+ :param obj: A Python primitive object, or one loaded using load(s)
+ :param transformers: Custom transformers to use
+ :return: The serialized data as a string
+ """
+ marshaller = JavaObjectMarshaller()
+ # Add custom transformers
+ for transformer in transformers:
+ marshaller.add_transformer(transformer)
+
+ return marshaller.dump(obj)
diff --git a/javaobj/v1/marshaller.py b/javaobj/v1/marshaller.py
new file mode 100644
index 0000000..9e5bdeb
--- /dev/null
+++ b/javaobj/v1/marshaller.py
@@ -0,0 +1,574 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for writing (writing is WIP currently) Java
+objects that will be deserialized by ObjectOutputStream. This form of
+object representation is a standard data interchange format in Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+import collections
+import logging
+import struct
+
+try:
+ # Python 2
+ from StringIO import StringIO as BytesIO
+except ImportError:
+ # Python 3+
+ from io import BytesIO
+
+# Javaobj modules
+from .beans import (
+ JavaClass,
+ JavaString,
+ JavaObject,
+ JavaByteArray,
+ JavaEnum,
+ JavaArray,
+)
+from ..constants import (
+ StreamConstants,
+ ClassDescFlags,
+ TerminalCode,
+ TypeCode,
+)
+from ..utils import (
+ log_debug,
+ log_error,
+ to_bytes,
+ BYTES_TYPE,
+ UNICODE_TYPE,
+)
+
+# ------------------------------------------------------------------------------
+
+__all__ = ("JavaObjectMarshaller",)
+
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaObjectMarshaller:
+ """
+ Serializes objects into Java serialization format
+ """
+
+ def __init__(self, stream=None):
+ """
+ Sets up members
+
+ :param stream: An output stream
+ """
+ self.object_stream = stream
+ self.object_obj = None
+ self.object_transformers = []
+ self.references = []
+
+ def add_transformer(self, transformer):
+ """
+ Appends an object transformer to the serialization process
+
+ :param transformer: An object with a transform(obj) method
+ """
+ self.object_transformers.append(transformer)
+
+ def dump(self, obj):
+ """
+ Dumps the given object in the Java serialization format
+ """
+ self.references = []
+ self.object_obj = obj
+ self.object_stream = BytesIO()
+ self._writeStreamHeader()
+ self.writeObject(obj)
+ return self.object_stream.getvalue()
+
+ def _writeStreamHeader(self): # pylint:disable=C0103
+ """
+ Writes the Java serialization magic header in the serialization stream
+ """
+ self._writeStruct(
+ ">HH",
+ 4,
+ (StreamConstants.STREAM_MAGIC, StreamConstants.STREAM_VERSION),
+ )
+
+ def writeObject(self, obj): # pylint:disable=C0103
+ """
+ Appends an object to the serialization stream
+
+ :param obj: A string or a deserialized Java object
+ :raise RuntimeError: Unsupported type
+ """
+ log_debug("Writing object of type {0}".format(type(obj).__name__))
+ if isinstance(obj, JavaArray):
+ # Deserialized Java array
+ self.write_array(obj)
+ elif isinstance(obj, JavaByteArray):
+ # Deserialized Java byte array
+ self.write_array(obj)
+ elif isinstance(obj, JavaEnum):
+ # Deserialized Java Enum
+ self.write_enum(obj)
+ elif isinstance(obj, JavaObject):
+ # Deserialized Java object
+ self.write_object(obj)
+ elif isinstance(obj, JavaString):
+ # Deserialized String
+ self.write_string(obj)
+ elif isinstance(obj, JavaClass):
+ # Java class
+ self.write_class(obj)
+ elif obj is None:
+ # Null
+ self.write_null()
+ elif type(obj) is str: # pylint:disable=C0123
+ # String value
+ self.write_blockdata(obj)
+ else:
+ # Unhandled type
+ raise RuntimeError(
+ "Object serialization of type {0} is not "
+ "supported.".format(type(obj))
+ )
+
+ def _writeStruct(self, unpack, length, args): # pylint:disable=C0103
+ """
+ Appends data to the serialization stream
+
+ :param unpack: Struct format string
+ :param length: Unused
+ :param args: Struct arguments
+ """
+ ba = struct.pack(unpack, *args)
+ self.object_stream.write(ba)
+
+ def _writeString(self, obj, use_reference=True): # pylint:disable=C0103
+ """
+ Appends a string to the serialization stream
+
+ :param obj: String to serialize
+ :param use_reference: If True, allow writing a reference
+ """
+ # TODO: Convert to "modified UTF-8"
+ # http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8
+ string = to_bytes(obj, "utf-8")
+
+ if use_reference and isinstance(obj, JavaString):
+ try:
+ idx = self.references.index(obj)
+ except ValueError:
+ # First appearance of the string
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for string: %s",
+ len(self.references)
+ - 1
+ + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+
+ self._writeStruct(">H", 2, (len(string),))
+ self.object_stream.write(string)
+ else:
+ # Write a reference to the previous type
+ logging.debug(
+ "*** Reusing ref 0x%X for string: %s",
+ idx + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+ self.write_reference(idx)
+ else:
+ self._writeStruct(">H", 2, (len(string),))
+ self.object_stream.write(string)
+
+ def write_string(self, obj, use_reference=True):
+ """
+ Writes a Java string with the TC_STRING type marker
+
+ :param obj: The string to print
+ :param use_reference: If True, allow writing a reference
+ """
+ if use_reference and isinstance(obj, JavaString):
+ try:
+ idx = self.references.index(obj)
+ except ValueError:
+ # String is not referenced: let _writeString store it
+ self._writeStruct(">B", 1, (TerminalCode.TC_STRING,))
+ self._writeString(obj, use_reference)
+ else:
+ # Reuse the referenced string
+ logging.debug(
+ "*** Reusing ref 0x%X for String: %s",
+ idx + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+ self.write_reference(idx)
+ else:
+ # Don't use references
+ self._writeStruct(">B", 1, (TerminalCode.TC_STRING,))
+ self._writeString(obj, use_reference)
+
+ def write_enum(self, obj):
+ """
+ Writes an Enum value
+
+ :param obj: A JavaEnum object
+ """
+ # FIXME: the output doesn't have the same references as the real
+ # serializable form
+ self._writeStruct(">B", 1, (TerminalCode.TC_ENUM,))
+
+ try:
+ idx = self.references.index(obj)
+ except ValueError:
+ # New reference
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for enum: %s",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+
+ self.write_classdesc(obj.get_class())
+ else:
+ self.write_reference(idx)
+
+ self.write_string(obj.constant)
+
+ def write_blockdata(self, obj, parent=None): # pylint:disable=W0613
+ """
+ Appends a block of data to the serialization stream
+
+ :param obj: String form of the data block
+ """
+ if isinstance(obj, UNICODE_TYPE):
+ # Latin-1: keep bytes as is
+ obj = to_bytes(obj, "latin-1")
+
+ length = len(obj)
+ if length <= 256:
+ # Small block data
+ # TC_BLOCKDATA (unsigned byte) (byte)[size]
+ self._writeStruct(">B", 1, (TerminalCode.TC_BLOCKDATA,))
+ self._writeStruct(">B", 1, (length,))
+ else:
+ # Large block data
+ # TC_BLOCKDATALONG (unsigned int) (byte)[size]
+ self._writeStruct(">B", 1, (TerminalCode.TC_BLOCKDATALONG,))
+ self._writeStruct(">I", 1, (length,))
+
+ self.object_stream.write(obj)
+
+ def write_null(self):
+ """
+ Writes a "null" value
+ """
+ self._writeStruct(">B", 1, (TerminalCode.TC_NULL,))
+
+ def write_object(self, obj, parent=None):
+ """
+ Writes an object header to the serialization stream
+
+ :param obj: Not yet used
+ :param parent: Not yet used
+ """
+ # Transform object
+ for transformer in self.object_transformers:
+ tmp_object = transformer.transform(obj)
+ if tmp_object is not obj:
+ obj = tmp_object
+ break
+
+ self._writeStruct(">B", 1, (TerminalCode.TC_OBJECT,))
+ cls = obj.get_class()
+ self.write_classdesc(cls)
+
+ # Add reference
+ self.references.append([])
+ logging.debug(
+ "*** Adding ref 0x%X for object %s",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+
+ all_names = collections.deque()
+ all_types = collections.deque()
+ tmpcls = cls
+ while tmpcls:
+ all_names.extendleft(reversed(tmpcls.fields_names))
+ all_types.extendleft(reversed(tmpcls.fields_types))
+ tmpcls = tmpcls.superclass
+ del tmpcls
+
+ logging.debug("<=> Field names: %s", all_names)
+ logging.debug("<=> Field types: %s", all_types)
+
+ for field_name, field_type in zip(all_names, all_types):
+ try:
+ logging.debug(
+ "Writing field %s (%s): %s",
+ field_name,
+ field_type,
+ getattr(obj, field_name),
+ )
+ self._write_value(field_type, getattr(obj, field_name))
+ except AttributeError as ex:
+ log_error(
+ "No attribute {0} for object {1}\nDir: {2}".format(
+ ex, repr(obj), dir(obj)
+ )
+ )
+ raise
+ del all_names, all_types
+
+ if (
+ cls.flags & ClassDescFlags.SC_SERIALIZABLE
+ and cls.flags & ClassDescFlags.SC_WRITE_METHOD
+ or cls.flags & ClassDescFlags.SC_EXTERNALIZABLE
+ and cls.flags & ClassDescFlags.SC_BLOCK_DATA
+ ):
+ for annotation in obj.annotations:
+ log_debug(
+ "Write annotation {0} for {1}".format(
+ repr(annotation), repr(obj)
+ )
+ )
+ if annotation is None:
+ self.write_null()
+ else:
+ self.writeObject(annotation)
+ self._writeStruct(">B", 1, (TerminalCode.TC_ENDBLOCKDATA,))
+
+ def write_class(self, obj, parent=None): # pylint:disable=W0613
+ """
+ Writes a class to the stream
+
+ :param obj: A JavaClass object
+ :param parent:
+ """
+ self._writeStruct(">B", 1, (TerminalCode.TC_CLASS,))
+ self.write_classdesc(obj)
+
+ def write_classdesc(self, obj, parent=None): # pylint:disable=W0613
+ """
+ Writes a class description
+
+ :param obj: Class description to write
+ :param parent:
+ """
+ if obj not in self.references:
+ # Add reference
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for classdesc %s",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ obj.name,
+ )
+
+ self._writeStruct(">B", 1, (TerminalCode.TC_CLASSDESC,))
+ self._writeString(obj.name)
+ self._writeStruct(">qB", 1, (obj.serialVersionUID, obj.flags))
+ self._writeStruct(">H", 1, (len(obj.fields_names),))
+
+ for field_name, field_type in zip(
+ obj.fields_names, obj.fields_types
+ ):
+ self._writeStruct(
+ ">B", 1, (self._convert_type_to_char(field_type),)
+ )
+ self._writeString(field_name)
+ if ord(field_type[0]) in (
+ TypeCode.TYPE_OBJECT,
+ TypeCode.TYPE_ARRAY,
+ ):
+ try:
+ idx = self.references.index(field_type)
+ except ValueError:
+ # First appearance of the type
+ self.references.append(field_type)
+ logging.debug(
+ "*** Adding ref 0x%X for field type %s",
+ len(self.references)
+ - 1
+ + StreamConstants.BASE_REFERENCE_IDX,
+ field_type,
+ )
+
+ self.write_string(field_type, False)
+ else:
+ # Write a reference to the previous type
+ logging.debug(
+ "*** Reusing ref 0x%X for %s (%s)",
+ idx + StreamConstants.BASE_REFERENCE_IDX,
+ field_type,
+ field_name,
+ )
+ self.write_reference(idx)
+
+ self._writeStruct(">B", 1, (TerminalCode.TC_ENDBLOCKDATA,))
+ if obj.superclass:
+ self.write_classdesc(obj.superclass)
+ else:
+ self.write_null()
+ else:
+ # Use reference
+ self.write_reference(self.references.index(obj))
+
+ def write_reference(self, ref_index):
+ """
+ Writes a reference
+ :param ref_index: Local index (0-based) to the reference
+ """
+ self._writeStruct(
+ ">BL",
+ 1,
+ (
+ TerminalCode.TC_REFERENCE,
+ ref_index + StreamConstants.BASE_REFERENCE_IDX,
+ ),
+ )
+
+ def write_array(self, obj):
+ """
+ Writes a JavaArray
+
+ :param obj: A JavaArray object
+ """
+ classdesc = obj.get_class()
+ self._writeStruct(">B", 1, (TerminalCode.TC_ARRAY,))
+ self.write_classdesc(classdesc)
+ self._writeStruct(">i", 1, (len(obj),))
+
+ # Add reference
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for array []",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ )
+
+ array_type_code = TypeCode(ord(classdesc.name[0]))
+ assert array_type_code == TypeCode.TYPE_ARRAY
+ type_code = TypeCode(ord(classdesc.name[1]))
+
+ if type_code == TypeCode.TYPE_OBJECT:
+ for o in obj:
+ self._write_value(classdesc.name[1:], o)
+ elif type_code == TypeCode.TYPE_ARRAY:
+ for a in obj:
+ self.write_array(a)
+ else:
+ log_debug("Write array of type {0}".format(chr(type_code.value)))
+ for v in obj:
+ log_debug("Writing: %s" % v)
+ self._write_value(type_code, v)
+
+ def _write_value(self, raw_field_type, value):
+ """
+ Writes an item of an array
+
+ :param raw_field_type: Value type
+ :param value: The value itself
+ """
+ if isinstance(raw_field_type, (TypeCode, int)):
+ field_type = raw_field_type
+ else:
+ # We don't need details for arrays and objects
+ field_type = TypeCode(ord(raw_field_type[0]))
+
+ if field_type == TypeCode.TYPE_BOOLEAN:
+ self._writeStruct(">B", 1, (1 if value else 0,))
+ elif field_type == TypeCode.TYPE_BYTE:
+ self._writeStruct(">b", 1, (value,))
+ elif field_type == TypeCode.TYPE_CHAR:
+ self._writeStruct(">H", 1, (ord(value),))
+ elif field_type == TypeCode.TYPE_SHORT:
+ self._writeStruct(">h", 1, (value,))
+ elif field_type == TypeCode.TYPE_INTEGER:
+ self._writeStruct(">i", 1, (value,))
+ elif field_type == TypeCode.TYPE_LONG:
+ self._writeStruct(">q", 1, (value,))
+ elif field_type == TypeCode.TYPE_FLOAT:
+ self._writeStruct(">f", 1, (value,))
+ elif field_type == TypeCode.TYPE_DOUBLE:
+ self._writeStruct(">d", 1, (value,))
+ elif field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ if value is None:
+ self.write_null()
+ elif isinstance(value, JavaEnum):
+ self.write_enum(value)
+ elif isinstance(value, (JavaArray, JavaByteArray)):
+ self.write_array(value)
+ elif isinstance(value, JavaObject):
+ self.write_object(value)
+ elif isinstance(value, JavaString):
+ self.write_string(value)
+ elif isinstance(value, JavaClass):
+ self.write_class(value)
+ elif isinstance(value, (BYTES_TYPE, UNICODE_TYPE)):
+ self.write_blockdata(value)
+ else:
+ raise RuntimeError("Unknown typecode: {0}".format(field_type))
+ else:
+ raise RuntimeError("Unknown typecode: {0}".format(field_type))
+
+ @staticmethod
+ def _convert_type_to_char(type_char):
+ """
+ Converts the given type code to an int
+
+ :param type_char: A type code character
+ """
+ if isinstance(type_char, TypeCode):
+ return type_char.value
+
+ if isinstance(type_char, int):
+ return type_char
+
+ if isinstance(type_char, (BYTES_TYPE, UNICODE_TYPE)):
+ # Conversion to TypeCode will raise an error if the type
+ # is invalid
+ return TypeCode(ord(type_char[0])).value
+
+ raise RuntimeError(
+ "Typecode {0} ({1}) isn't supported.".format(
+ type_char, ord(type_char[0])
+ )
+ )
diff --git a/javaobj/v1/transformers.py b/javaobj/v1/transformers.py
new file mode 100644
index 0000000..c581125
--- /dev/null
+++ b/javaobj/v1/transformers.py
@@ -0,0 +1,392 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Implementation of the object transformers in v1 parser
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+from typing import Callable, Dict
+import functools
+
+from .beans import JavaClass, JavaObject
+from .unmarshaller import JavaObjectUnmarshaller
+from ..constants import ClassDescFlags, TerminalCode, TypeCode
+from ..utils import (
+ log_debug,
+ log_error,
+ to_bytes,
+ read_struct,
+ read_string,
+)
+
+
+__all__ = ("DefaultObjectTransformer",)
+
+
+class DefaultObjectTransformer(object): # pylint:disable=R0205
+ """
+ Default transformer for the deserialized objects.
+ Converts JavaObject objects to Python types (maps, lists, ...)
+ """
+
+ class JavaList(list, JavaObject):
+ """
+ Python-Java list bridge type
+ """
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ list.__init__(self)
+ JavaObject.__init__(self)
+
+ def __hash__(self):
+ return list.__hash__(self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Lists have their content in there annotations
+ self.extend(self.annotations[1:])
+
+ @functools.total_ordering
+ class JavaPrimitiveClass(JavaObject):
+ """
+ Parent of Java classes matching a primitive (Bool, Integer, Long, ...)
+ """
+
+ def __init__(self, unmarshaller):
+ JavaObject.__init__(self)
+ self.value = None
+
+ def __str__(self):
+ return str(self.value)
+
+ def __repr__(self):
+ return repr(self.value)
+
+ def __hash__(self):
+ return hash(self.value)
+
+ def __eq__(self, other):
+ return self.value == other
+
+ def __lt__(self, other):
+ return self.value < other
+
+ class JavaBool(JavaPrimitiveClass):
+ def __bool__(self):
+ return self.value
+
+ class JavaInt(JavaPrimitiveClass):
+ def __int__(self):
+ return self.value
+
+ class JavaMap(dict, JavaObject):
+ """
+ Python-Java dictionary/map bridge type
+ """
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ dict.__init__(self)
+ JavaObject.__init__(self)
+
+ def __hash__(self):
+ return dict.__hash__(self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Group annotation elements 2 by 2
+ args = [iter(self.annotations[1:])] * 2
+ for key, value in zip(*args):
+ self[key] = value
+
+ class JavaLinkedHashMap(JavaMap):
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Ignore the blockdata opid
+ (opid,) = unmarshaller._readStruct(">B")
+ if opid != ClassDescFlags.SC_BLOCK_DATA:
+ raise ValueError("Start of block data not found")
+
+ # Read HashMap fields
+ self.buckets = unmarshaller._read_value(
+ TypeCode.TYPE_INTEGER, ident
+ )
+ self.size = unmarshaller._read_value(TypeCode.TYPE_INTEGER, ident)
+
+ # Read entries
+ for _ in range(self.size):
+ key = unmarshaller._read_and_exec_opcode()[1]
+ value = unmarshaller._read_and_exec_opcode()[1]
+ self[key] = value
+
+ # Ignore the end of the blockdata
+ unmarshaller._read_and_exec_opcode(
+ ident, [TerminalCode.TC_ENDBLOCKDATA]
+ )
+
+ # Ignore the trailing 0
+ (opid,) = unmarshaller._readStruct(">B")
+ if opid != 0:
+ raise ValueError("Should find 0x0, got {0:x}".format(opid))
+
+ class JavaSet(set, JavaObject):
+ """
+ Python-Java set bridge type
+ """
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ set.__init__(self)
+ JavaObject.__init__(self)
+
+ def __hash__(self):
+ return set.__hash__(self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ self.update(self.annotations[1:])
+
+ class JavaTreeSet(JavaSet):
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Annotation[1] == size of the set
+ self.update(self.annotations[2:])
+
+ class JavaTime(JavaObject):
+ """
+ Represents the classes found in the java.time package
+
+ The semantic of the fields depends on the type of time that has been
+ parsed
+ """
+
+ DURATION_TYPE = 1
+ INSTANT_TYPE = 2
+ LOCAL_DATE_TYPE = 3
+ LOCAL_TIME_TYPE = 4
+ LOCAL_DATE_TIME_TYPE = 5
+ ZONE_DATE_TIME_TYPE = 6
+ ZONE_REGION_TYPE = 7
+ ZONE_OFFSET_TYPE = 8
+ OFFSET_TIME_TYPE = 9
+ OFFSET_DATE_TIME_TYPE = 10
+ YEAR_TYPE = 11
+ YEAR_MONTH_TYPE = 12
+ MONTH_DAY_TYPE = 13
+ PERIOD_TYPE = 14
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ JavaObject.__init__(self)
+ self.type = -1
+ self.year = None
+ self.month = None
+ self.day = None
+ self.hour = None
+ self.minute = None
+ self.second = None
+ self.nano = None
+ self.offset = None
+ self.zone = None
+
+ self.time_handlers = {
+ self.DURATION_TYPE: self.do_duration,
+ self.INSTANT_TYPE: self.do_instant,
+ self.LOCAL_DATE_TYPE: self.do_local_date,
+ self.LOCAL_DATE_TIME_TYPE: self.do_local_date_time,
+ self.LOCAL_TIME_TYPE: self.do_local_time,
+ self.ZONE_DATE_TIME_TYPE: self.do_zoned_date_time,
+ self.ZONE_OFFSET_TYPE: self.do_zone_offset,
+ self.ZONE_REGION_TYPE: self.do_zone_region,
+ self.OFFSET_TIME_TYPE: self.do_offset_time,
+ self.OFFSET_DATE_TIME_TYPE: self.do_offset_date_time,
+ self.YEAR_TYPE: self.do_year,
+ self.YEAR_MONTH_TYPE: self.do_year_month,
+ self.MONTH_DAY_TYPE: self.do_month_day,
+ self.PERIOD_TYPE: self.do_period,
+ }
+
+ def __str__(self):
+ return (
+ "JavaTime(type=0x{s.type}, "
+ "year={s.year}, month={s.month}, day={s.day}, "
+ "hour={s.hour}, minute={s.minute}, second={s.second}, "
+ "nano={s.nano}, offset={s.offset}, zone={s.zone})"
+ ).format(s=self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Convert back annotations to bytes
+ # latin-1 is used to ensure that bytes are kept as is
+ content = to_bytes(self.annotations[0], "latin1")
+ (self.type,), content = read_struct(content, ">b")
+
+ try:
+ self.time_handlers[self.type](unmarshaller, content)
+ except KeyError as ex:
+ log_error("Unhandled kind of time: {}".format(ex))
+
+ def do_duration(self, unmarshaller, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_instant(self, unmarshaller, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_local_date(self, unmarshaller, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">ibb")
+ return data
+
+ def do_local_time(self, unmarshaller, data):
+ (hour,), data = read_struct(data, ">b")
+ minute = 0
+ second = 0
+ nano = 0
+
+ if hour < 0:
+ hour = ~hour
+ else:
+ (minute,), data = read_struct(data, ">b")
+ if minute < 0:
+ minute = ~minute
+ else:
+ (second,), data = read_struct(data, ">b")
+ if second < 0:
+ second = ~second
+ else:
+ (nano,), data = read_struct(data, ">i")
+
+ self.hour = hour
+ self.minute = minute
+ self.second = second
+ self.nano = nano
+ return data
+
+ def do_local_date_time(self, unmarshaller, data):
+ data = self.do_local_date(unmarshaller, data)
+ data = self.do_local_time(unmarshaller, data)
+ return data
+
+ def do_zoned_date_time(self, unmarshaller, data):
+ data = self.do_local_date_time(unmarshaller, data)
+ data = self.do_zone_offset(unmarshaller, data)
+ data = self.do_zone_region(unmarshaller, data)
+ return data
+
+ def do_zone_offset(self, unmarshaller, data):
+ (offset_byte,), data = read_struct(data, ">b")
+ if offset_byte == 127:
+ (self.offset,), data = read_struct(data, ">i")
+ else:
+ self.offset = offset_byte * 900
+ return data
+
+ def do_zone_region(self, unmarshaller, data):
+ self.zone, data = read_string(data)
+ return data
+
+ def do_offset_time(self, unmarshaller, data):
+ data = self.do_local_time(unmarshaller, data)
+ data = self.do_zone_offset(unmarshaller, data)
+ return data
+
+ def do_offset_date_time(self, unmarshaller, data):
+ data = self.do_local_date_time(unmarshaller, data)
+ data = self.do_zone_offset(unmarshaller, data)
+ return data
+
+ def do_year(self, unmarshaller, data):
+ (self.year,), data = read_struct(data, ">i")
+ return data
+
+ def do_year_month(self, unmarshaller, data):
+ (self.year, self.month), data = read_struct(data, ">ib")
+ return data
+
+ def do_month_day(self, unmarshaller, data):
+ (self.month, self.day), data = read_struct(data, ">bb")
+ return data
+
+ def do_period(self, unmarshaller, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">iii")
+ return data
+
+ TYPE_MAPPER = {
+ "java.util.ArrayList": JavaList,
+ "java.util.LinkedList": JavaList,
+ "java.util.HashMap": JavaMap,
+ "java.util.LinkedHashMap": JavaLinkedHashMap,
+ "java.util.TreeMap": JavaMap,
+ "java.util.HashSet": JavaSet,
+ "java.util.LinkedHashSet": JavaSet,
+ "java.util.TreeSet": JavaTreeSet,
+ "java.time.Ser": JavaTime,
+ "java.lang.Boolean": JavaBool,
+ "java.lang.Integer": JavaInt,
+ "java.lang.Long": JavaInt,
+ } # type: Dict[str, Callable[[JavaObjectUnmarshaller], JavaObject]]
+
+ def create(self, classdesc, unmarshaller):
+ # type: (JavaClass, JavaObjectUnmarshaller) -> JavaObject
+ """
+ Transforms a deserialized Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ try:
+ mapped_type = self.TYPE_MAPPER[classdesc.name]
+ except KeyError:
+ # Return a JavaObject by default
+ return JavaObject()
+ else:
+ log_debug("---")
+ log_debug(classdesc.name)
+ log_debug("---")
+
+ java_object = mapped_type(unmarshaller)
+
+ log_debug(">>> java_object: {0}".format(java_object))
+ return java_object
diff --git a/javaobj/v1/unmarshaller.py b/javaobj/v1/unmarshaller.py
new file mode 100644
index 0000000..c3c7709
--- /dev/null
+++ b/javaobj/v1/unmarshaller.py
@@ -0,0 +1,853 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for reading Java objects serialized by ObjectOutputStream.
+This form of object representation is a standard data interchange format in
+Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+from typing import Any, Union
+import os
+import struct
+
+# Javaobj modules
+from .beans import (
+ JavaClass,
+ JavaString,
+ JavaObject,
+ JavaByteArray,
+ JavaEnum,
+ JavaArray,
+)
+from ..constants import (
+ StreamConstants,
+ ClassDescFlags,
+ TerminalCode,
+ TypeCode,
+ StreamCodeDebug,
+)
+from ..utils import (
+ log_debug,
+ log_error,
+ read_to_str,
+ to_unicode,
+ unicode_char,
+ hexdump,
+)
+
+numpy = None # Imported only when really used
+
+# ------------------------------------------------------------------------------
+
+__all__ = ("JavaObjectUnmarshaller",)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+# Convertion of a Java type char to its NumPy equivalent
+NUMPY_TYPE_MAP = {
+ TypeCode.TYPE_BYTE: "B",
+ TypeCode.TYPE_CHAR: "b",
+ TypeCode.TYPE_DOUBLE: ">d",
+ TypeCode.TYPE_FLOAT: ">f",
+ TypeCode.TYPE_INTEGER: ">i",
+ TypeCode.TYPE_LONG: ">l",
+ TypeCode.TYPE_SHORT: ">h",
+ TypeCode.TYPE_BOOLEAN: ">B",
+}
+
+# ------------------------------------------------------------------------------
+
+
+class JavaObjectUnmarshaller:
+ """
+ Deserializes a Java serialization stream
+ """
+
+ def __init__(self, stream, use_numpy_arrays=False):
+ """
+ Sets up members
+
+ :param stream: An input stream (opened in binary/bytes mode)
+ :raise IOError: Invalid input stream
+ """
+ self.use_numpy_arrays = use_numpy_arrays
+
+ # Numpy array support
+ if self.use_numpy_arrays:
+ try:
+ global numpy
+ import numpy as np
+
+ numpy = np
+ except ImportError:
+ pass
+
+ # Check stream
+ if stream is None:
+ raise IOError("No input stream given")
+
+ # Prepare the association Terminal Symbol -> Reading method
+ self.opmap = {
+ TerminalCode.TC_NULL: self.do_null,
+ TerminalCode.TC_CLASSDESC: self.do_classdesc,
+ TerminalCode.TC_OBJECT: self.do_object,
+ TerminalCode.TC_STRING: self.do_string,
+ TerminalCode.TC_LONGSTRING: self.do_string_long,
+ TerminalCode.TC_ARRAY: self.do_array,
+ TerminalCode.TC_CLASS: self.do_class,
+ TerminalCode.TC_BLOCKDATA: self.do_blockdata,
+ TerminalCode.TC_BLOCKDATALONG: self.do_blockdata_long,
+ TerminalCode.TC_REFERENCE: self.do_reference,
+ TerminalCode.TC_ENUM: self.do_enum,
+ # note that we are reusing do_null:
+ TerminalCode.TC_ENDBLOCKDATA: self.do_null,
+ }
+
+ # Set up members
+ self.current_object = None
+ self.reference_counter = 0
+ self.references = []
+ self.object_transformers = []
+ self.object_stream = stream
+
+ # Read the stream header (magic & version)
+ self._readStreamHeader()
+
+ def readObject(self, ignore_remaining_data=False):
+ """
+ Reads an object from the input stream
+
+ :param ignore_remaining_data: If True, don't log an error when
+ unused trailing bytes are remaining
+ :return: The unmarshalled object
+ :raise Exception: Any exception that occurred during unmarshalling
+ """
+ try:
+ # TODO: add expects
+ _, res = self._read_and_exec_opcode(ident=0)
+
+ position_bak = self.object_stream.tell()
+ the_rest = self.object_stream.read()
+ if not ignore_remaining_data and len(the_rest) != 0:
+ log_error(
+ "Warning!!!!: Stream still has {0} bytes left. "
+ "Enable debug mode of logging to see the hexdump.".format(
+ len(the_rest)
+ )
+ )
+ log_debug("\n{0}".format(hexdump(the_rest)))
+ else:
+ log_debug("Java Object unmarshalled successfully!")
+
+ self.object_stream.seek(position_bak)
+ return res
+ except Exception:
+ self._oops_dump_state(ignore_remaining_data)
+ raise
+
+ def add_transformer(self, transformer):
+ """
+ Appends an object transformer to the deserialization process
+
+ :param transformer: An object with a transform(obj) method
+ """
+ self.object_transformers.append(transformer)
+
+ def _readStreamHeader(self):
+ """
+ Reads the magic header of a Java serialization stream
+
+ :raise IOError: Invalid magic header (not a Java stream)
+ """
+ (magic, version) = self._readStruct(">HH")
+ if (
+ magic != StreamConstants.STREAM_MAGIC
+ or version != StreamConstants.STREAM_VERSION
+ ):
+ raise IOError(
+ "The stream is not java serialized object. "
+ "Invalid stream header: {0:04X}{1:04X}".format(magic, version)
+ )
+
+ def _read_and_exec_opcode(self, ident=0, expect=None):
+ """
+ Reads the next opcode, and executes its handler
+
+ :param ident: Log identation level
+ :param expect: A list of expected opcodes
+ :return: A tuple: (opcode, result of the handler)
+ :raise IOError: Read opcode is not one of the expected ones
+ :raise RuntimeError: Unknown opcode
+ """
+ position = self.object_stream.tell()
+ (opid,) = self._readStruct(">B")
+ log_debug(
+ "OpCode: 0x{0:X} -- {1} (at offset 0x{2:X})".format(
+ opid, StreamCodeDebug.op_id(opid), position
+ ),
+ ident,
+ )
+
+ if expect and opid not in expect:
+ raise IOError(
+ "Unexpected opcode 0x{0:X} -- {1} "
+ "(at offset 0x{2:X})".format(
+ opid, StreamCodeDebug.op_id(opid), position
+ )
+ )
+
+ try:
+ handler = self.opmap[opid]
+ except KeyError:
+ raise RuntimeError(
+ "Unknown OpCode in the stream: 0x{0:X} "
+ "(at offset 0x{1:X})".format(opid, position)
+ )
+ else:
+ return opid, handler(ident=ident)
+
+ def _readStruct(self, unpack):
+ """
+ Reads from the input stream, using struct
+
+ :param unpack: An unpack format string
+ :return: The result of struct.unpack (tuple)
+ :raise RuntimeError: End of stream reached during unpacking
+ """
+ length = struct.calcsize(unpack)
+ ba = self.object_stream.read(length)
+
+ if len(ba) != length:
+ raise RuntimeError(
+ "Stream has been ended unexpectedly while unmarshaling."
+ )
+
+ return struct.unpack(unpack, ba)
+
+ def _readString(self, length_fmt="H"):
+ """
+ Reads a serialized string
+
+ :param length_fmt: Structure format of the string length (H or Q)
+ :return: The deserialized string
+ :raise RuntimeError: Unexpected end of stream
+ """
+ (length,) = self._readStruct(">{0}".format(length_fmt))
+ ba = self.object_stream.read(length)
+ return to_unicode(ba)
+
+ def do_classdesc(self, parent=None, ident=0):
+ """
+ Handles a TC_CLASSDESC opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaClass object
+ """
+ # TC_CLASSDESC className serialVersionUID newHandle classDescInfo
+ # classDescInfo:
+ # classDescFlags fields classAnnotation superClassDesc
+ # classDescFlags:
+ # (byte) // Defined in Terminal Symbols and Constants
+ # fields:
+ # (short) fieldDesc[count]
+
+ # fieldDesc:
+ # primitiveDesc
+ # objectDesc
+ # primitiveDesc:
+ # prim_typecode fieldName
+ # objectDesc:
+ # obj_typecode fieldName className1
+ clazz = JavaClass()
+ log_debug("[classdesc]", ident)
+ class_name = self._readString()
+ clazz.name = class_name
+ log_debug("Class name: %s" % class_name, ident)
+
+ # serialVersionUID is a Java (signed) long => 8 bytes
+ serialVersionUID, classDescFlags = self._readStruct(">qB")
+ clazz.serialVersionUID = serialVersionUID
+ clazz.flags = classDescFlags
+
+ self._add_reference(clazz, ident)
+
+ log_debug(
+ "Serial: 0x{0:X} / {0:d} - classDescFlags: 0x{1:X} {2}".format(
+ serialVersionUID,
+ classDescFlags,
+ StreamCodeDebug.flags(classDescFlags),
+ ),
+ ident,
+ )
+ (length,) = self._readStruct(">H")
+ log_debug("Fields num: 0x{0:X}".format(length), ident)
+
+ clazz.fields_names = []
+ clazz.fields_types = []
+ for fieldId in range(length):
+ (typecode,) = self._readStruct(">B")
+ field_name = self._readString()
+ base_field_type = self._convert_char_to_type(typecode)
+
+ log_debug("> Reading field {0}".format(field_name), ident)
+
+ if base_field_type == TypeCode.TYPE_ARRAY:
+ _, field_type = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE),
+ )
+
+ if type(field_type) is not JavaString: # pylint:disable=C0123
+ raise AssertionError(
+ "Field type must be a JavaString, "
+ "not {0}".format(type(field_type))
+ )
+
+ elif base_field_type == TypeCode.TYPE_OBJECT:
+ _, field_type = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE),
+ )
+
+ if isinstance(field_type, JavaClass):
+ # FIXME: ugly trick
+ field_type = JavaString(field_type.name)
+
+ if type(field_type) is not JavaString: # pylint:disable=C0123
+ raise AssertionError(
+ "Field type must be a JavaString, "
+ "not {0}".format(type(field_type))
+ )
+ else:
+ # Convert the TypeCode to its char value
+ field_type = JavaString(str(chr(base_field_type.value)))
+
+ log_debug(
+ "< FieldName: 0x{0:X} Name:{1} Type:{2} ID:{3}".format(
+ typecode, field_name, field_type, fieldId
+ ),
+ ident,
+ )
+ assert field_name is not None
+ assert field_type is not None
+
+ clazz.fields_names.append(field_name)
+ clazz.fields_types.append(field_type)
+
+ if parent:
+ parent.__fields = clazz.fields_names # pylint:disable=W0212
+ parent.__types = clazz.fields_types # pylint:disable=W0212
+
+ # classAnnotation
+ (opid,) = self._readStruct(">B")
+ log_debug(
+ "OpCode: 0x{0:X} -- {1} (classAnnotation)".format(
+ opid, StreamCodeDebug.op_id(opid)
+ ),
+ ident,
+ )
+ if opid != TerminalCode.TC_ENDBLOCKDATA:
+ raise NotImplementedError("classAnnotation isn't implemented yet")
+
+ # superClassDesc
+ log_debug("Reading Super Class of {0}".format(clazz.name), ident)
+ _, superclassdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ log_debug(
+ "Super Class for {0}: {1}".format(clazz.name, str(superclassdesc)),
+ ident,
+ )
+ clazz.superclass = superclassdesc
+ return clazz
+
+ def do_blockdata(self, parent=None, ident=0):
+ """
+ Handles TC_BLOCKDATA opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string containing the block data
+ """
+ # TC_BLOCKDATA (unsigned byte) (byte)[size]
+ log_debug("[blockdata]", ident)
+ (length,) = self._readStruct(">B")
+ ba = self.object_stream.read(length)
+
+ # Ensure we have an str
+ return read_to_str(ba)
+
+ def do_blockdata_long(self, parent=None, ident=0):
+ """
+ Handles TC_BLOCKDATALONG opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string containing the block data
+ """
+ # TC_BLOCKDATALONG (int) (byte)[size]
+ log_debug("[blockdatalong]", ident)
+ (length,) = self._readStruct(">I")
+ ba = self.object_stream.read(length)
+
+ # Ensure we have an str
+ return read_to_str(ba)
+
+ def do_class(self, parent=None, ident=0):
+ """
+ Handles TC_CLASS opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaClass object
+ """
+ # TC_CLASS classDesc newHandle
+ log_debug("[class]", ident)
+
+ # TODO: what to do with "(ClassDesc)prevObject".
+ # (see 3rd line for classDesc:)
+ _, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ log_debug("Classdesc: {0}".format(classdesc), ident)
+ self._add_reference(classdesc, ident)
+ return classdesc
+
+ def do_object(self, parent=None, ident=0):
+ """
+ Handles a TC_OBJECT opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaClass object
+ """
+ # TC_OBJECT classDesc newHandle classdata[] // data for each class
+ java_object = JavaObject()
+ log_debug("[object]", ident)
+ log_debug(
+ "java_object.annotations just after instantiation: {0}".format(
+ java_object.annotations
+ ),
+ ident,
+ )
+
+ # TODO: what to do with "(ClassDesc)prevObject".
+ # (see 3rd line for classDesc:)
+ opcode, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ # self.TC_REFERENCE hasn't shown in spec, but actually is here
+
+ # Create object
+ for transformer in self.object_transformers:
+ java_object = transformer.create(classdesc, self)
+ if java_object is not None:
+ break
+
+ # Store classdesc of this object
+ java_object.classdesc = classdesc
+
+ # Store the reference
+ self._add_reference(java_object, ident)
+
+ # classdata[]
+
+ if (
+ classdesc.flags & ClassDescFlags.SC_EXTERNALIZABLE
+ and not classdesc.flags & ClassDescFlags.SC_BLOCK_DATA
+ ):
+ # TODO:
+ raise NotImplementedError("externalContents isn't implemented yet")
+
+ if classdesc.flags & ClassDescFlags.SC_SERIALIZABLE:
+ # TODO: look at ObjectInputStream.readSerialData()
+ # FIXME: Handle the SC_WRITE_METHOD flag
+
+ # create megalist
+ tempclass = classdesc
+ megalist = []
+ megatypes = []
+ log_debug("Constructing class...", ident)
+ while tempclass:
+ log_debug("Class: {0}".format(tempclass.name), ident + 1)
+ class_fields_str = " - ".join(
+ " ".join((str(field_type), field_name))
+ for field_type, field_name in zip(
+ tempclass.fields_types, tempclass.fields_names
+ )
+ )
+ if class_fields_str:
+ log_debug(class_fields_str, ident + 2)
+
+ fieldscopy = tempclass.fields_names[:]
+ fieldscopy.extend(megalist)
+ megalist = fieldscopy
+
+ fieldscopy = tempclass.fields_types[:]
+ fieldscopy.extend(megatypes)
+ megatypes = fieldscopy
+
+ tempclass = tempclass.superclass
+
+ log_debug("Values count: {0}".format(len(megalist)), ident)
+ log_debug("Prepared list of values: {0}".format(megalist), ident)
+ log_debug("Prepared list of types: {0}".format(megatypes), ident)
+
+ for field_name, field_type in zip(megalist, megatypes):
+ log_debug(
+ "Reading field: {0} - {1}".format(field_type, field_name)
+ )
+ res = self._read_value(field_type, ident, name=field_name)
+ java_object.__setattr__(field_name, res)
+
+ if (
+ classdesc.flags & ClassDescFlags.SC_SERIALIZABLE
+ and classdesc.flags & ClassDescFlags.SC_WRITE_METHOD
+ or classdesc.flags & ClassDescFlags.SC_EXTERNALIZABLE
+ and classdesc.flags & ClassDescFlags.SC_BLOCK_DATA
+ or classdesc.superclass is not None
+ and classdesc.superclass.flags & ClassDescFlags.SC_SERIALIZABLE
+ and classdesc.superclass.flags & ClassDescFlags.SC_WRITE_METHOD
+ ):
+ # objectAnnotation
+ log_debug(
+ "java_object.annotations before: {0}".format(
+ java_object.annotations
+ ),
+ ident,
+ )
+
+ while opcode != TerminalCode.TC_ENDBLOCKDATA:
+ opcode, obj = self._read_and_exec_opcode(ident=ident + 1)
+ # , expect=[self.TC_ENDBLOCKDATA, self.TC_BLOCKDATA,
+ # self.TC_OBJECT, self.TC_NULL, self.TC_REFERENCE])
+ if opcode != TerminalCode.TC_ENDBLOCKDATA:
+ java_object.annotations.append(obj)
+
+ log_debug("objectAnnotation value: {0}".format(obj), ident)
+
+ log_debug(
+ "java_object.annotations after: {0}".format(
+ java_object.annotations
+ ),
+ ident,
+ )
+
+ # Allow extra loading operations
+ if hasattr(java_object, "__extra_loading__"):
+ log_debug("Java object has extra loading capability.")
+ java_object.__extra_loading__(self, ident)
+
+ log_debug(">>> java_object: {0}".format(java_object), ident)
+ return java_object
+
+ def do_string(self, parent=None, ident=0):
+ """
+ Handles a TC_STRING opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string
+ """
+ log_debug("[string]", ident)
+ ba = JavaString(self._readString())
+ self._add_reference(ba, ident)
+ return ba
+
+ def do_string_long(self, parent=None, ident=0):
+ """
+ Handles a TC_LONGSTRING opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string
+ """
+ log_debug("[long string]", ident)
+ ba = JavaString(self._readString("Q"))
+ self._add_reference(ba, ident)
+ return ba
+
+ def do_array(self, parent=None, ident=0):
+ """
+ Handles a TC_ARRAY opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A list of deserialized objects
+ """
+ # TC_ARRAY classDesc newHandle (int) values[size]
+ log_debug("[array]", ident)
+ _, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+
+ array = JavaArray(classdesc)
+
+ self._add_reference(array, ident)
+
+ (size,) = self._readStruct(">i")
+ log_debug("size: {0}".format(size), ident)
+
+ array_type_code = TypeCode(ord(classdesc.name[0]))
+ assert array_type_code == TypeCode.TYPE_ARRAY
+ type_code = TypeCode(ord(classdesc.name[1]))
+
+ if type_code in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ for _ in range(size):
+ _, res = self._read_and_exec_opcode(ident=ident + 1)
+ log_debug("Object value: {0}".format(res), ident)
+ array.append(res)
+ elif type_code == TypeCode.TYPE_BYTE:
+ array = JavaByteArray(self.object_stream.read(size), classdesc)
+ elif self.use_numpy_arrays and numpy is not None:
+ array = numpy.fromfile(
+ self.object_stream,
+ dtype=NUMPY_TYPE_MAP[type_code],
+ count=size,
+ )
+ else:
+ for _ in range(size):
+ res = self._read_value(type_code, ident)
+ log_debug("Native value: {0}".format(repr(res)), ident)
+ array.append(res)
+
+ return array
+
+ def do_reference(self, parent=None, ident=0):
+ """
+ Handles a TC_REFERENCE opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: The referenced object
+ """
+ (handle,) = self._readStruct(">L")
+ log_debug("## Reference handle: 0x{0:X}".format(handle), ident)
+ ref = self.references[handle - StreamConstants.BASE_REFERENCE_IDX]
+ log_debug("###-> Type: {0} - Value: {1}".format(type(ref), ref), ident)
+ return ref
+
+ @staticmethod
+ def do_null(parent=None, ident=0):
+ """
+ Handles a TC_NULL opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: Always None
+ """
+ return None
+
+ def do_enum(self, parent=None, ident=0):
+ """
+ Handles a TC_ENUM opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaEnum object
+ """
+ # TC_ENUM classDesc newHandle enumConstantName
+ enum = JavaEnum()
+ _, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ enum.classdesc = classdesc
+ self._add_reference(enum, ident)
+ (
+ _,
+ enumConstantName,
+ ) = self._read_and_exec_opcode( # pylint:disable=C0103
+ ident=ident + 1,
+ expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE),
+ )
+ enum.constant = enumConstantName
+ return enum
+
+ def _read_value(self, raw_field_type, ident, name=""):
+ # type: (Union[bytes, int, TypeCode], int, str) -> Any
+ """
+ Reads the next value, of the given type
+
+ :param raw_field_type: A serialization typecode
+ :param ident: Log indentation
+ :param name: Field name (for logs)
+ :return: The read value
+ :raise RuntimeError: Unknown field type
+ """
+ if isinstance(raw_field_type, TypeCode):
+ field_type = raw_field_type
+ elif isinstance(raw_field_type, int):
+ field_type = TypeCode(raw_field_type)
+ else:
+ # We don't need details for arrays and objects
+ raw_code = raw_field_type[0]
+ if isinstance(raw_code, int):
+ field_type = TypeCode(raw_code)
+ else:
+ field_type = TypeCode(ord(raw_code))
+
+ if field_type == TypeCode.TYPE_BOOLEAN:
+ (val,) = self._readStruct(">B")
+ res = bool(val) # type: Any
+ elif field_type == TypeCode.TYPE_BYTE:
+ (res,) = self._readStruct(">b")
+ elif field_type == TypeCode.TYPE_CHAR:
+ # TYPE_CHAR is defined by the serialization specification
+ # but not used in the implementation, so this is
+ # a hypothetical code
+ res = unicode_char(self._readStruct(">H")[0])
+ elif field_type == TypeCode.TYPE_SHORT:
+ (res,) = self._readStruct(">h")
+ elif field_type == TypeCode.TYPE_INTEGER:
+ (res,) = self._readStruct(">i")
+ elif field_type == TypeCode.TYPE_LONG:
+ (res,) = self._readStruct(">q")
+ elif field_type == TypeCode.TYPE_FLOAT:
+ (res,) = self._readStruct(">f")
+ elif field_type == TypeCode.TYPE_DOUBLE:
+ (res,) = self._readStruct(">d")
+ elif field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ _, res = self._read_and_exec_opcode(ident=ident + 1)
+ else:
+ raise RuntimeError("Unknown typecode: {0}".format(field_type))
+
+ log_debug(
+ "* {0} {1}: {2}".format(chr(field_type.value), name, repr(res)),
+ ident,
+ )
+ return res
+
+ @staticmethod
+ def _convert_char_to_type(type_char):
+ # type: (Any) -> TypeCode
+ """
+ Ensures a read character is a typecode.
+
+ :param type_char: Read typecode
+ :return: The typecode as an integer (using ord)
+ :raise RuntimeError: Unknown typecode
+ """
+ typecode = type_char
+ if not isinstance(type_char, int):
+ typecode = ord(type_char)
+
+ try:
+ return TypeCode(typecode)
+ except ValueError:
+ raise RuntimeError(
+ "Typecode {0} ({1}) isn't supported.".format(
+ type_char, typecode
+ )
+ )
+
+ def _add_reference(self, obj, ident=0):
+ """
+ Adds a read reference to the marshaler storage
+
+ :param obj: Reference to add
+ :param ident: Log indentation level
+ """
+ log_debug(
+ "## New reference handle 0x{0:X}: {1} -> {2}".format(
+ len(self.references) + StreamConstants.BASE_REFERENCE_IDX,
+ type(obj).__name__,
+ repr(obj),
+ ),
+ ident,
+ )
+ self.references.append(obj)
+
+ def _oops_dump_state(self, ignore_remaining_data=False):
+ """
+ Log a deserialization error
+
+ :param ignore_remaining_data: If True, don't log an error when
+ unused trailing bytes are remaining
+ """
+ log_error("==Oops state dump" + "=" * (30 - 17))
+ log_error("References: {0}".format(self.references))
+ log_error(
+ "Stream seeking back at -16 byte "
+ "(2nd line is an actual position!):"
+ )
+
+ # Do not use a keyword argument
+ self.object_stream.seek(-16, os.SEEK_CUR)
+ position = self.object_stream.tell()
+ the_rest = self.object_stream.read()
+
+ if not ignore_remaining_data and len(the_rest) != 0:
+ log_error(
+ "Warning!!!!: Stream still has {0} bytes left:\n{1}".format(
+ len(the_rest), hexdump(the_rest, position)
+ )
+ )
+
+ log_error("=" * 30)
diff --git a/javaobj/v2/__init__.py b/javaobj/v2/__init__.py
new file mode 100644
index 0000000..e9745ea
--- /dev/null
+++ b/javaobj/v2/__init__.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""
+Rewritten version of the un-marshalling process of javaobj.
+
+The previous process had issues in some cases that
+
+This package is based on the approach of the jdeserialize project (in Java)
+See: https://github.com/frohoff/jdeserialize
+
+The object transformer concept of javaobj has been adapted to work with this
+approach.
+
+This package should handle more files than before, in read-only mode.
+The writing mode should be handled by the "classic" javaobj code.
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from . import api, beans, core, main, stream, transformers # noqa: 401
+from .main import load, loads # noqa: 401
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
diff --git a/javaobj/v2/api.py b/javaobj/v2/api.py
new file mode 100644
index 0000000..8d9cd0d
--- /dev/null
+++ b/javaobj/v2/api.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Definition of the object transformer API
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+from typing import List, Optional
+
+from ..constants import TypeCode # pylint:disable=W0611
+from .beans import ( # pylint:disable=W0611
+ JavaClassDesc,
+ JavaInstance,
+ ParsedJavaContent,
+)
+from .stream import DataStreamReader # pylint:disable=W0611
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class IJavaStreamParser:
+ """
+ API of the Java stream parser
+ """
+
+ def run(self):
+ # type: () -> List[ParsedJavaContent]
+ """
+ Parses the input stream
+ """
+ raise NotImplementedError
+
+ def dump(self, content):
+ # type: (List[ParsedJavaContent]) -> str
+ """
+ Dumps to a string the given objects
+ """
+ raise NotImplementedError
+
+ def _read_content(self, type_code, block_data, class_desc=None):
+ # type: (int, bool, Optional[JavaClassDesc]) -> ParsedJavaContent
+ """
+ Parses the next content. Use with care (use only in a transformer)
+ """
+
+
+class ObjectTransformer(object): # pylint:disable=R0205
+ """
+ Representation of an object transformer
+ """
+
+ def create_instance(self, classdesc): # pylint:disable=W0613,R0201
+ # type: (JavaClassDesc) -> Optional[JavaInstance]
+ """
+ Transforms a parsed Java object into a Python object.
+
+ The result must be a JavaInstance bean, or None if the transformer
+ doesn't support this kind of instance.
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ return None
+
+ def load_array(
+ self, reader, type_code, size
+ ): # pylint:disable=W0613,R0201
+ # type: (DataStreamReader, TypeCode, int) -> Optional[list]
+ """
+ Loads and returns the content of a Java array, if possible.
+
+ The result of this method must be the content of the array, i.e. a list
+ or an array. It will be stored in a JavaArray bean created by the
+ parser.
+
+ This method must return None if it can't handle the array.
+
+ :param reader: The data stream reader
+ :param type_code: Type of the elements of the array
+ :param size: Number of elements in the array
+ """
+ return None
+
+ def load_custom_writeObject(
+ self, parser, reader, name
+ ): # pylint:disable=W0613,R0201
+ # type: (IJavaStreamParser, DataStreamReader, str) -> Optional[JavaClassDesc]
+ """
+ Reads content stored from a custom writeObject.
+
+ This method is called only if the class description has both the
+ ``SC_SERIALIZABLE`` and ``SC_WRITE_METHOD`` flags set.
+
+ The stream parsing will stop and fail if this method returns None.
+
+ :param parser: The JavaStreamParser in use
+ :param reader: The data stream reader
+ :param name: The class description name
+ :return: A Java class description, if handled, else None
+ """
+ return None
diff --git a/javaobj/v2/beans.py b/javaobj/v2/beans.py
new file mode 100644
index 0000000..0b81f16
--- /dev/null
+++ b/javaobj/v2/beans.py
@@ -0,0 +1,641 @@
+#!/usr/bin/env python3
+"""
+Definition of the beans used to represent the parsed objects
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+import logging
+from enum import IntEnum
+from typing import Any, Dict, List, Optional, Set
+
+from ..constants import ClassDescFlags, TypeCode
+from ..modifiedutf8 import byte_to_int, decode_modified_utf8
+from ..utils import UNICODE_TYPE
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class ContentType(IntEnum):
+ """
+ Types of objects
+ """
+
+ INSTANCE = 0
+ CLASS = 1
+ ARRAY = 2
+ STRING = 3
+ ENUM = 4
+ CLASSDESC = 5
+ BLOCKDATA = 6
+ EXCEPTIONSTATE = 7
+
+
+class ClassDataType(IntEnum):
+ """
+ Class data types
+ """
+
+ NOWRCLASS = 0
+ WRCLASS = 1
+ EXTERNAL_CONTENTS = 2
+ OBJECT_ANNOTATION = 3
+
+
+class ClassDescType(IntEnum):
+ """
+ Types of class descriptions
+ """
+
+ NORMALCLASS = 0
+ PROXYCLASS = 1
+
+
+class FieldType(IntEnum):
+ """
+ Types of class fields
+ """
+
+ BYTE = TypeCode.TYPE_BYTE.value
+ CHAR = TypeCode.TYPE_CHAR.value
+ DOUBLE = TypeCode.TYPE_DOUBLE.value
+ FLOAT = TypeCode.TYPE_FLOAT.value
+ INTEGER = TypeCode.TYPE_INTEGER.value
+ LONG = TypeCode.TYPE_LONG.value
+ SHORT = TypeCode.TYPE_SHORT.value
+ BOOLEAN = TypeCode.TYPE_BOOLEAN.value
+ ARRAY = TypeCode.TYPE_ARRAY.value
+ OBJECT = TypeCode.TYPE_OBJECT.value
+
+ def type_code(self):
+ # type: () -> TypeCode
+ """
+ Converts this FieldType to its matching TypeCode
+ """
+ return TypeCode(self.value)
+
+
+class ParsedJavaContent(object): # pylint:disable=R205
+ """
+ Generic representation of data parsed from the stream
+ """
+
+ def __init__(self, content_type):
+ # type: (ContentType) -> None
+ self.type = content_type # type: ContentType
+ self.is_exception = False # type: bool
+ self.handle = 0 # type: int
+
+ def __str__(self):
+ return "[ParseJavaObject 0x{0:x} - {1}]".format(self.handle, self.type)
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Base implementation of a parsed object
+ """
+ return "\t" * indent + str(self)
+
+ def validate(self):
+ """
+ Validity check on the object
+ """
+ pass
+
+
+class ExceptionState(ParsedJavaContent):
+ """
+ Representation of a failed parsing
+ """
+
+ def __init__(self, exception_object, data):
+ # type: (ParsedJavaContent, bytes) -> None
+ super(ExceptionState, self).__init__(ContentType.EXCEPTIONSTATE)
+ self.exception_object = exception_object
+ self.stream_data = data
+ self.handle = exception_object.handle
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the exception
+ """
+ return "\t" * indent + "[ExceptionState {0:x}]".format(self.handle)
+
+
+class ExceptionRead(Exception):
+ """
+ Exception used to indicate that an exception object has been parsed
+ """
+
+ def __init__(self, content):
+ # type: (ParsedJavaContent) -> None
+ self.exception_object = content
+
+
+class JavaString(ParsedJavaContent):
+ """
+ Represents a Java string
+ """
+
+ def __init__(self, handle, data):
+ # type: (int, bytes) -> None
+ super(JavaString, self).__init__(ContentType.STRING)
+ self.handle = handle
+ value, length = decode_modified_utf8(data)
+ self.value = value # type: str
+ self.length = length # type: int
+
+ def __repr__(self):
+ return repr(self.value)
+
+ def __str__(self):
+ return self.value
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the string
+ """
+ return "\t" * indent + "[String {0:x}: {1}]".format(
+ self.handle, repr(self.value)
+ )
+
+ def __hash__(self):
+ return hash(self.value)
+
+ def __eq__(self, other):
+ return self.value == other
+
+
+class JavaField:
+ """
+ Represents a field in a Java class description
+ """
+
+ def __init__(self, field_type, name, class_name=None):
+ # type: (FieldType, str, Optional[JavaString]) -> None
+ self.type = field_type
+ self.name = name
+ self.class_name = class_name
+ self.is_inner_class_reference = False
+
+ if self.class_name:
+ self.validate(self.class_name.value)
+
+ def validate(self, java_type):
+ # type: (str) -> None
+ """
+ Validates the type given as parameter
+ """
+ if self.type == FieldType.OBJECT:
+ if not java_type:
+ raise ValueError("Class name can't be empty")
+
+ if java_type[0] != "L" or java_type[-1] != ";":
+ raise ValueError(
+ "Invalid object field type: {0}".format(java_type)
+ )
+
+
+class JavaClassDesc(ParsedJavaContent):
+ """
+ Represents the description of a class
+ """
+
+ def __init__(self, class_desc_type):
+ # type: (ClassDescType) -> None
+ super(JavaClassDesc, self).__init__(ContentType.CLASSDESC)
+
+ # Type of class description
+ self.class_type = class_desc_type # type: ClassDescType
+
+ # Class name
+ self.name = None # type: Optional[str]
+
+ # Serial version UID
+ self.serial_version_uid = 0 # type: int
+
+ # Description flags byte
+ self.desc_flags = 0 # type: int
+
+ # Fields in the class
+ self.fields = [] # type: List[JavaField]
+
+ # Inner classes
+ self.inner_classes = [] # type: List[JavaClassDesc]
+
+ # List of annotations objects
+ self.annotations = [] # type: List[ParsedJavaContent]
+
+ # The super class of this one, if any
+ self.super_class = None # type: Optional[JavaClassDesc]
+
+ # Indicates if it is a super class
+ self.is_super_class = False
+
+ # List of the interfaces of the class
+ self.interfaces = [] # type: List[str]
+
+ # Set of enum constants
+ self.enum_constants = set() # type: Set[str]
+
+ # Flag to indicate if this is an inner class
+ self.is_inner_class = False # type: bool
+
+ # Flag to indicate if this is a local inner class
+ self.is_local_inner_class = False # type: bool
+
+ # Flag to indicate if this is a static member class
+ self.is_static_member_class = False # type: bool
+
+ def __str__(self):
+ return "[classdesc 0x{0:x}: name {1}, uid {2}]".format(
+ self.handle, self.name, self.serial_version_uid
+ )
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the exception
+ """
+ return "\t" * indent + "[classdesc 0x{0:x}: name {1}, uid {2}]".format(
+ self.handle, self.name, self.serial_version_uid
+ )
+
+ @property
+ def serialVersionUID(self): # pylint:disable=C0103
+ """
+ Mimics the javaobj API
+ """
+ return self.serial_version_uid
+
+ @property
+ def flags(self):
+ """
+ Mimics the javaobj API
+ """
+ return self.desc_flags
+
+ @property
+ def fields_names(self):
+ """
+ Mimics the javaobj API
+ """
+ return [field.name for field in self.fields]
+
+ @property
+ def fields_types(self):
+ """
+ Mimics the javaobj API
+ """
+ return [field.type for field in self.fields]
+
+ @property
+ def data_type(self):
+ """
+ Computes the data type of this class (Write, No Write, Annotation)
+ """
+ if ClassDescFlags.SC_SERIALIZABLE & self.desc_flags:
+ return (
+ ClassDataType.WRCLASS
+ if (ClassDescFlags.SC_WRITE_METHOD & self.desc_flags)
+ else ClassDataType.NOWRCLASS
+ )
+
+ if ClassDescFlags.SC_EXTERNALIZABLE & self.desc_flags:
+ return (
+ ClassDataType.OBJECT_ANNOTATION
+ if (ClassDescFlags.SC_WRITE_METHOD & self.desc_flags)
+ else ClassDataType.EXTERNAL_CONTENTS
+ )
+
+ raise ValueError("Unhandled Class Data Type")
+
+ def is_array_class(self):
+ # type: () -> bool
+ """
+ Determines if this is an array type
+ """
+ return self.name.startswith("[") if self.name else False
+
+ def get_hierarchy(self, classes):
+ # type: (List["JavaClassDesc"]) -> None
+ """
+ Generates a list of class descriptions in this class's hierarchy, in
+ the order described by the Object Stream Serialization Protocol.
+ This is the order in which fields are read from the stream.
+
+ :param classes: A list to be filled in with the hierarchy
+ """
+ if self.super_class is not None:
+ if self.super_class.class_type == ClassDescType.PROXYCLASS:
+ logging.warning("Hit a proxy class in super class hierarchy")
+ else:
+ self.super_class.get_hierarchy(classes)
+
+ classes.append(self)
+
+ def validate(self):
+ """
+ Checks the validity of this class description
+ """
+ serial_or_extern = (
+ ClassDescFlags.SC_SERIALIZABLE | ClassDescFlags.SC_EXTERNALIZABLE
+ )
+ if (self.desc_flags & serial_or_extern) == 0 and self.fields:
+ raise ValueError(
+ "Non-serializable, non-externalizable class has fields"
+ )
+
+ if self.desc_flags & serial_or_extern == serial_or_extern:
+ raise ValueError("Class is both serializable and externalizable")
+
+ if self.desc_flags & ClassDescFlags.SC_ENUM:
+ if self.fields or self.interfaces:
+ raise ValueError(
+ "Enums shouldn't implement interfaces "
+ "or have non-constant fields"
+ )
+ else:
+ if self.enum_constants:
+ raise ValueError(
+ "Non-enum classes shouldn't have enum constants"
+ )
+
+
+class JavaInstance(ParsedJavaContent):
+ """
+ Represents an instance of Java object
+ """
+
+ def __init__(self):
+ super(JavaInstance, self).__init__(ContentType.INSTANCE)
+ self.classdesc = None # type: JavaClassDesc
+ self.field_data = {} # type: Dict[JavaClassDesc, Dict[JavaField, Any]]
+ self.annotations = (
+ {}
+ ) # type: Dict[JavaClassDesc, List[ParsedJavaContent]]
+ self.is_external_instance = False
+
+ def __str__(self):
+ return "[instance 0x{0:x}: type {1}]".format(
+ self.handle, self.classdesc.name
+ )
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the exception
+ """
+ prefix = "\t" * indent
+ sub_prefix = "\t" * (indent + 1)
+
+ dump = [
+ prefix
+ + "[instance 0x{0:x}: {1:x} / {2}]".format(
+ self.handle, self.classdesc.handle, self.classdesc.name
+ )
+ ]
+
+ for cd, annotations in self.annotations.items():
+ dump.append(
+ "{0}{1} -- {2} annotations".format(
+ prefix, cd.name, len(annotations)
+ )
+ )
+ for ann in annotations:
+ dump.append(sub_prefix + repr(ann))
+
+ for cd, fields in self.field_data.items():
+ dump.append(
+ "{0}{1} -- {2} fields".format(prefix, cd.name, len(fields))
+ )
+ for field, value in fields.items():
+ if isinstance(value, ParsedJavaContent):
+ if self.handle != 0 and value.handle == self.handle:
+ value_str = "this"
+ else:
+ value_str = "\n" + value.dump(indent + 2)
+ else:
+ value_str = repr(value)
+
+ dump.append(
+ "{0}{1} {2}: {3}".format(
+ sub_prefix, field.type.name, field.name, value_str
+ )
+ )
+
+ dump.append(prefix + "[/instance 0x{0:x}]".format(self.handle))
+ return "\n".join(dump)
+
+ def __getattr__(self, name):
+ """
+ Returns the field with the given name
+ """
+ for cd_fields in self.field_data.values():
+ for field, value in cd_fields.items():
+ if field.name == name:
+ return value
+
+ raise AttributeError(name)
+
+ def get_class(self):
+ """
+ Returns the class of this instance
+ """
+ return self.classdesc
+
+ def load_from_blockdata(
+ self, parser, reader, indent=0
+ ): # pylint:disable=W0613,R0201
+ """
+ Reads content stored in a block data.
+
+ This method is called only if the class description has both the
+ ``SC_EXTERNALIZABLE`` and ``SC_BLOCK_DATA`` flags set.
+
+ The stream parsing will stop and fail if this method returns False.
+
+ :param parser: The JavaStreamParser in use
+ :param reader: The underlying data stream reader
+ :param indent: Indentation to use in logs
+ :return: True on success, False on error
+ """
+ return False
+
+ def load_from_instance(self, indent=0): # pylint:disable=W0613,R0201
+ # type: (int) -> bool
+ """
+ Updates the content of this instance from its parsed fields and
+ annotations
+
+ :param indent: Indentation to use in logs
+ :return: True on success, False on error (currently ignored)
+ """
+ return False
+
+
+class JavaClass(ParsedJavaContent):
+ """
+ Represents a stored Java class
+ """
+
+ def __init__(self, handle, class_desc):
+ # type: (int, JavaClassDesc) -> None
+ super(JavaClass, self).__init__(ContentType.CLASS)
+ self.handle = handle
+ self.classdesc = class_desc
+
+ def __str__(self):
+ return "[class 0x{0:x}: {1}]".format(self.handle, self.classdesc)
+
+ __repr__ = __str__
+
+ @property
+ def name(self):
+ """
+ Mimics the javaobj API
+ """
+ return self.classdesc.name
+
+
+class JavaEnum(ParsedJavaContent):
+ """
+ Represents an enumeration value
+ """
+
+ def __init__(self, handle, class_desc, value):
+ # type: (int, JavaClassDesc, JavaString) -> None
+ super(JavaEnum, self).__init__(ContentType.ENUM)
+ self.handle = handle
+ self.classdesc = class_desc
+ self.value = value
+
+ def __str__(self):
+ return "[Enum 0x{0:x}: {1}]".format(self.handle, self.value)
+
+ __repr__ = __str__
+
+ @property
+ def constant(self):
+ """
+ Mimics the javaobj API
+ """
+ return self.value
+
+
+class JavaArray(ParsedJavaContent, list):
+ """
+ Represents a Java array
+ """
+
+ def __init__(self, handle, class_desc, field_type, content):
+ # type: (int, JavaClassDesc, FieldType, List[Any]) -> None
+ list.__init__(self, content)
+ ParsedJavaContent.__init__(self, ContentType.ARRAY)
+ self.handle = handle
+ self.classdesc = class_desc
+ self.field_type = field_type
+ self.data = content
+
+ def __str__(self):
+ return "[{0}]".format(", ".join(repr(x) for x in self))
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the array
+ """
+ prefix = "\t" * indent
+ sub_prefix = "\t" * (indent + 1)
+ dump = [
+ "{0}[array 0x{1:x}: {2} items - stored as {3}]".format(
+ prefix, self.handle, len(self), type(self.data).__name__
+ )
+ ]
+ for x in self:
+ if isinstance(x, ParsedJavaContent):
+ if self.handle != 0 and x.handle == self.handle:
+ dump.append("this,")
+ else:
+ dump.append(x.dump(indent + 1) + ",")
+ else:
+ dump.append(sub_prefix + repr(x) + ",")
+ dump.append(prefix + "[/array 0x{0:x}]".format(self.handle))
+ return "\n".join(dump)
+
+ @property
+ def _data(self):
+ """
+ Mimics the javaobj API
+ """
+ return tuple(self)
+
+
+class BlockData(ParsedJavaContent):
+ """
+ Represents a data block
+ """
+
+ def __init__(self, data):
+ # type: (bytes) -> None
+ super(BlockData, self).__init__(ContentType.BLOCKDATA)
+ self.data = data
+
+ def __str__(self):
+ return "[blockdata 0x{0:x}: {1} bytes]".format(
+ self.handle, len(self.data)
+ )
+
+ def __repr__(self):
+ return repr(self.data)
+
+ def __eq__(self, other):
+ if isinstance(other, (str, UNICODE_TYPE)):
+ other_data = tuple(ord(x) for x in other)
+ elif isinstance(other, bytes):
+ other_data = tuple(byte_to_int(x) for x in other)
+ else:
+ # Can't compare
+ return False
+
+ return other_data == tuple(byte_to_int(x) for x in self.data)
diff --git a/javaobj/v2/core.py b/javaobj/v2/core.py
new file mode 100644
index 0000000..8e018a6
--- /dev/null
+++ b/javaobj/v2/core.py
@@ -0,0 +1,766 @@
+#!/usr/bin/env python3
+"""
+Second parsing approach for javaobj, using the same approach as jdeserialize
+See: https://github.com/frohoff/jdeserialize
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+import logging
+import os
+from typing import ( # pylint:disable=W0611
+ IO,
+ Any,
+ Callable,
+ Dict,
+ List,
+ Optional,
+)
+
+from ..constants import (
+ PRIMITIVE_TYPES,
+ StreamConstants,
+ TerminalCode,
+ TypeCode,
+)
+from ..modifiedutf8 import ( # pylint:disable=W0611 # noqa: F401
+ decode_modified_utf8,
+)
+from . import api # pylint:disable=W0611
+from .beans import (
+ BlockData,
+ ClassDataType,
+ ClassDescType,
+ ExceptionRead,
+ ExceptionState,
+ FieldType,
+ JavaArray,
+ JavaClass,
+ JavaClassDesc,
+ JavaEnum,
+ JavaField,
+ JavaInstance,
+ JavaString,
+ ParsedJavaContent,
+)
+from .stream import DataStreamReader
+from .transformers import DefaultObjectTransformer
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaStreamParser(api.IJavaStreamParser):
+ """
+ Parses a Java stream
+ """
+
+ def __init__(self, fd, transformers):
+ # type: (IO[bytes], List[api.ObjectTransformer]) -> None
+ """
+ :param fd: File-object to read from
+ :param transformers: Custom object transformers
+ """
+ # Input stream
+ self.__fd = fd
+ self.__reader = DataStreamReader(fd)
+
+ # Object transformers
+ self.__transformers = list(transformers)
+
+ # Logger
+ self._log = logging.getLogger("javaobj.parser")
+
+ # Handles
+ self.__handle_maps = [] # type: List[Dict[int, ParsedJavaContent]]
+ self.__handles = {} # type: Dict[int, ParsedJavaContent]
+
+ # Initial handle value
+ self.__current_handle = StreamConstants.BASE_REFERENCE_IDX.value
+
+ # Definition of the type code handlers
+ # Each takes the type code as argument
+ self.__type_code_handlers = {
+ TerminalCode.TC_OBJECT: self._do_object,
+ TerminalCode.TC_CLASS: self._do_class,
+ TerminalCode.TC_ARRAY: self._do_array,
+ TerminalCode.TC_STRING: self._read_new_string,
+ TerminalCode.TC_LONGSTRING: self._read_new_string,
+ TerminalCode.TC_ENUM: self._do_enum,
+ TerminalCode.TC_CLASSDESC: self._do_classdesc,
+ TerminalCode.TC_PROXYCLASSDESC: self._do_classdesc,
+ TerminalCode.TC_REFERENCE: self._do_reference,
+ TerminalCode.TC_NULL: self._do_null,
+ TerminalCode.TC_EXCEPTION: self._do_exception,
+ TerminalCode.TC_BLOCKDATA: self._do_block_data,
+ TerminalCode.TC_BLOCKDATALONG: self._do_block_data,
+ } # type: Dict[int, Callable[[int], ParsedJavaContent]]
+
+ def run(self):
+ # type: () -> List[ParsedJavaContent]
+ """
+ Parses the input stream
+ """
+ # Check the magic byte
+ magic = self.__reader.read_ushort()
+ if magic != StreamConstants.STREAM_MAGIC:
+ raise ValueError("Invalid file magic: 0x{0:x}".format(magic))
+
+ # Check the stream version
+ version = self.__reader.read_ushort()
+ if version != StreamConstants.STREAM_VERSION:
+ raise ValueError("Invalid file version: 0x{0:x}".format(version))
+
+ # Reset internal state
+ self._reset()
+
+ # Read content
+ contents = [] # type: List[ParsedJavaContent]
+ while True:
+ self._log.info("Reading next content")
+ start = self.__fd.tell()
+ try:
+ type_code = self.__reader.read_byte()
+ except EOFError:
+ # End of file
+ break
+
+ if type_code == TerminalCode.TC_RESET:
+ # Explicit reset
+ self._reset()
+ continue
+
+ parsed_content = self._read_content(type_code, True)
+ self._log.debug("Read: %s", parsed_content)
+ if parsed_content is not None and parsed_content.is_exception:
+ # Get the raw data between the start of the object and our
+ # current position
+ end = self.__fd.tell()
+ self.__fd.seek(start, os.SEEK_SET)
+ stream_data = self.__fd.read(end - start)
+
+ # Prepare an exception object
+ parsed_content = ExceptionState(parsed_content, stream_data)
+
+ contents.append(parsed_content)
+
+ for content in self.__handles.values():
+ content.validate()
+
+ # TODO: connect member classes ? (see jdeserialize @ 864)
+
+ if self.__handles:
+ self.__handle_maps.append(self.__handles.copy())
+
+ return contents
+
+ def dump(self, content):
+ # type: (List[ParsedJavaContent]) -> str
+ """
+ Dumps to a string the given objects
+ """
+ lines = [] # type: List[str]
+
+ # Stream content
+ lines.append("//// BEGIN stream content output")
+ lines.extend(str(c) for c in content)
+ lines.append("//// END stream content output")
+ lines.append("")
+
+ lines.append("//// BEGIN instance dump")
+ for c in self.__handles.values():
+ if isinstance(c, JavaInstance):
+ instance = c # type: JavaInstance
+ lines.extend(self._dump_instance(instance))
+ lines.append("//// END instance dump")
+ lines.append("")
+ return "\n".join(lines)
+
+ @staticmethod
+ def _dump_instance(instance):
+ # type: (JavaInstance) -> List[str]
+ """
+ Dumps an instance to a set of lines
+ """
+ lines = [] # type: List[str]
+ lines.append(
+ "[instance 0x{0:x}: 0x{1:x} / {2}".format(
+ instance.handle,
+ instance.classdesc.handle,
+ instance.classdesc.name,
+ )
+ )
+
+ if instance.annotations:
+ lines.append("\tobject annotations:")
+ for cd, annotation in instance.annotations.items():
+ lines.append("\t" + (cd.name or "null"))
+ for c in annotation:
+ lines.append("\t\t" + str(c))
+
+ if instance.field_data:
+ lines.append("\tfield data:")
+ for field, obj in instance.field_data.items():
+ line = "\t\t" + (field.name or "null") + ": "
+ if isinstance(obj, ParsedJavaContent):
+ content = obj # type: ParsedJavaContent
+ h = content.handle
+ if h == instance.handle:
+ line += "this"
+ else:
+ line += "r0x{0:x}".format(h)
+
+ line += ": " + str(content)
+ else:
+ line += str(obj)
+
+ lines.append(line)
+
+ lines.append("]")
+ return lines
+
+ def _reset(self):
+ """
+ Resets the internal state of the parser
+ """
+ if self.__handles:
+ self.__handle_maps.append(self.__handles.copy())
+
+ self.__handles.clear()
+
+ # Reset handle index
+ self.__current_handle = StreamConstants.BASE_REFERENCE_IDX
+
+ def _new_handle(self):
+ # type: () -> int
+ """
+ Returns a new handle value
+ """
+ handle = self.__current_handle
+ self.__current_handle += 1
+ return handle
+
+ def _set_handle(self, handle, content):
+ # type: (int, ParsedJavaContent) -> None
+ """
+ Stores the reference to an object
+ """
+ if handle in self.__handles:
+ raise ValueError("Trying to reset handle {0:x}".format(handle))
+
+ self.__handles[handle] = content
+
+ @staticmethod
+ def _do_null(_):
+ """
+ The easiest one
+ """
+ return None
+
+ def _read_content(self, type_code, block_data, class_desc=None):
+ # type: (int, bool, Optional[JavaClassDesc]) -> ParsedJavaContent
+ """
+ Parses the next content
+ """
+ if not block_data and type_code in (
+ TerminalCode.TC_BLOCKDATA,
+ TerminalCode.TC_BLOCKDATALONG,
+ ):
+ raise ValueError("Got a block data, but not allowed here.")
+
+ try:
+ # Look for a handler for that type code
+ handler = self.__type_code_handlers[type_code]
+ except KeyError:
+ # Look for an external reader
+ if (
+ class_desc
+ and class_desc.name
+ and class_desc.data_type == ClassDataType.WRCLASS
+ ):
+ # Return its result immediately
+ return self._custom_readObject(class_desc.name)
+
+ # No valid custom reader: abandon
+ raise ValueError("Unknown type code: 0x{0:x}".format(type_code))
+ else:
+ try:
+ # Parse the object
+ return handler(type_code)
+ except ExceptionRead as ex:
+ # We found an exception object: return it (raise later)
+ return ex.exception_object
+
+ def _read_new_string(self, type_code):
+ # type: (int) -> JavaString
+ """
+ Reads a Java String
+ """
+ if type_code == TerminalCode.TC_REFERENCE:
+ # Got a reference
+ previous = self._do_reference()
+ if not isinstance(previous, JavaString):
+ raise ValueError("Invalid reference to a Java string")
+ return previous
+
+ # Assign a new handle
+ handle = self._new_handle()
+
+ # Read the length
+ if type_code == TerminalCode.TC_STRING:
+ length = self.__reader.read_ushort()
+ elif type_code == TerminalCode.TC_LONGSTRING:
+ length = self.__reader.read_long()
+ if length < 0 or length > 2147483647:
+ raise ValueError("Invalid string length: {0}".format(length))
+
+ if length < 65536:
+ self._log.warning("Small string stored as a long one")
+
+ # Parse the content
+ data = self.__fd.read(length)
+ java_str = JavaString(handle, data)
+
+ # Store the reference to the string
+ self._set_handle(handle, java_str)
+ return java_str
+
+ def _read_classdesc(self):
+ # type: () -> JavaClassDesc
+ """
+ Reads a class description with its type code
+ """
+ type_code = self.__reader.read_byte()
+ return self._do_classdesc(type_code)
+
+ def _do_classdesc(self, type_code):
+ # type: (int) -> JavaClassDesc
+ """
+ Parses a class description
+ """
+ if type_code == TerminalCode.TC_CLASSDESC:
+ # Do the real job
+ name = self.__reader.read_UTF()
+ serial_version_uid = self.__reader.read_long()
+ handle = self._new_handle()
+ desc_flags = self.__reader.read_byte()
+ nb_fields = self.__reader.read_short()
+
+ if nb_fields < 0:
+ raise ValueError("Invalid field count: {0}".format(nb_fields))
+
+ fields = [] # type: List[JavaField]
+ for _ in range(nb_fields):
+ field_type = self.__reader.read_byte()
+ field_name = self.__reader.read_UTF()
+ class_name = None
+
+ if field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ # String type code
+ str_type_code = self.__reader.read_byte()
+ class_name = self._read_new_string(str_type_code)
+ elif field_type not in PRIMITIVE_TYPES:
+ raise ValueError(
+ "Invalid field type char: 0x{0:x}".format(field_type)
+ )
+
+ fields.append(
+ JavaField(FieldType(field_type), field_name, class_name)
+ )
+
+ # Setup the class description bean
+ class_desc = JavaClassDesc(ClassDescType.NORMALCLASS)
+ class_desc.name = name
+ class_desc.serial_version_uid = serial_version_uid
+ class_desc.handle = handle
+ class_desc.desc_flags = desc_flags
+ class_desc.fields = fields
+ class_desc.annotations = self._read_class_annotations(class_desc)
+ class_desc.super_class = self._read_classdesc()
+
+ if class_desc.super_class:
+ class_desc.super_class.is_super_class = True
+
+ # Store the reference to the parsed bean
+ self._set_handle(handle, class_desc)
+ return class_desc
+ elif type_code == TerminalCode.TC_NULL:
+ # Null reference
+ return None
+ elif type_code == TerminalCode.TC_REFERENCE:
+ # Reference to an already loading class description
+ previous = self._do_reference()
+ if not isinstance(previous, JavaClassDesc):
+ raise ValueError(
+ "Referenced object is not a class description"
+ )
+ return previous
+ elif type_code == TerminalCode.TC_PROXYCLASSDESC:
+ # Proxy class description
+ handle = self._new_handle()
+ nb_interfaces = self.__reader.read_int()
+ interfaces = [
+ self.__reader.read_UTF() for _ in range(nb_interfaces)
+ ]
+
+ class_desc = JavaClassDesc(ClassDescType.PROXYCLASS)
+ class_desc.handle = handle
+ class_desc.interfaces = interfaces
+ class_desc.annotations = self._read_class_annotations()
+ class_desc.super_class = self._read_classdesc()
+
+ if class_desc.super_class:
+ class_desc.super_class.is_super_class = True
+
+ # Store the reference to the parsed bean
+ self._set_handle(handle, class_desc)
+ return class_desc
+
+ raise ValueError("Expected a valid class description starter")
+
+ def _custom_readObject(self, class_name):
+ # type: (str) -> ParsedJavaContent
+ """
+ Reads an object with a custom serialization process
+
+ :param class_name: Name of the class to load
+ :return: The parsed object
+ :raise ValueError: Unknown kind of class
+ """
+ self.__fd.seek(-1, os.SEEK_CUR)
+ for transformer in self.__transformers:
+ class_data = transformer.load_custom_writeObject(
+ self, self.__reader, class_name
+ )
+ if class_data:
+ return class_data
+
+ raise ValueError("Custom readObject can not be processed")
+
+ def _read_class_annotations(self, class_desc=None):
+ # type: (Optional[JavaClassDesc]) -> List[ParsedJavaContent]
+ """
+ Reads the annotations associated to a class
+ """
+ contents = [] # type: List[ParsedJavaContent]
+ while True:
+ type_code = self.__reader.read_byte()
+ if type_code == TerminalCode.TC_ENDBLOCKDATA:
+ # We're done here
+ return contents
+ elif type_code == TerminalCode.TC_RESET:
+ # Reset references
+ self._reset()
+ continue
+
+ java_object = self._read_content(type_code, True, class_desc)
+
+ if java_object is not None and java_object.is_exception:
+ # Found an exception: raise it
+ raise ExceptionRead(java_object)
+
+ contents.append(java_object)
+
+ raise Exception("Class annotation reading stopped before end")
+
+ def _create_instance(self, class_desc):
+ # type: (JavaClassDesc) -> JavaInstance
+ """
+ Creates a JavaInstance object, by a transformer if possible
+ """
+ # Try to create the transformed object
+ for transformer in self.__transformers:
+ instance = transformer.create_instance(class_desc)
+ if instance is not None:
+ if class_desc.name:
+ instance.is_external_instance = not self._is_default_supported(
+ class_desc.name
+ )
+ return instance
+
+ return JavaInstance()
+
+ def _do_object(self, type_code=0):
+ # type: (int) -> JavaInstance
+ """
+ Parses an object
+ """
+ # Parse the object class description
+ class_desc = self._read_classdesc()
+
+ # Assign a new handle
+ handle = self._new_handle()
+ self._log.debug(
+ "Reading new object: handle %x, classdesc %s", handle, class_desc
+ )
+
+ # Prepare the instance object
+ instance = self._create_instance(class_desc)
+ instance.classdesc = class_desc
+ instance.handle = handle
+
+ # Store the instance
+ self._set_handle(handle, instance)
+
+ # Read the instance content
+ self._read_class_data(instance)
+ self._log.debug("Done reading object handle %x", handle)
+ return instance
+
+ def _is_default_supported(self, class_name):
+ # type: (str) -> bool
+ """
+ Checks if this class is supported by the default object transformer
+ """
+ default_transf = [
+ x
+ for x in self.__transformers
+ if isinstance(x, DefaultObjectTransformer)
+ ]
+ return (
+ bool(default_transf)
+ and class_name in default_transf[0]._type_mapper
+ )
+
+ def _read_class_data(self, instance):
+ # type: (JavaInstance) -> None
+ """
+ Reads the content of an instance
+ """
+ # Read the class hierarchy
+ classes = [] # type: List[JavaClassDesc]
+ instance.classdesc.get_hierarchy(classes)
+
+ all_data = {} # type: Dict[JavaClassDesc, Dict[JavaField, Any]]
+ annotations = {} # type: Dict[JavaClassDesc, List[ParsedJavaContent]]
+
+ for cd in classes:
+ values = {} # type: Dict[JavaField, Any]
+ cd.validate()
+ if (
+ cd.data_type == ClassDataType.NOWRCLASS
+ or cd.data_type == ClassDataType.WRCLASS
+ ):
+ if (
+ cd.data_type == ClassDataType.WRCLASS
+ and instance.is_external_instance
+ ):
+ annotations[cd] = self._read_class_annotations(cd)
+ else:
+ for field in cd.fields:
+ values[field] = self._read_field_value(field.type)
+ all_data[cd] = values
+
+ if cd.data_type == ClassDataType.WRCLASS:
+ annotations[cd] = self._read_class_annotations(cd)
+ else:
+ if cd.data_type == ClassDataType.OBJECT_ANNOTATION:
+ # Call the transformer if possible
+ if not instance.load_from_blockdata(self, self.__reader):
+ # Can't read :/
+ raise ValueError(
+ "hit externalizable with nonzero SC_BLOCK_DATA; "
+ "can't interpret data"
+ )
+ annotations[cd] = self._read_class_annotations(cd)
+
+ # Fill the instance object
+ instance.annotations = annotations
+ instance.field_data = all_data
+
+ # Load transformation from the fields and annotations
+ instance.load_from_instance()
+
+ def _read_field_value(self, field_type):
+ # type: (FieldType) -> Any
+ """
+ Reads the value of an instance field
+ """
+ if field_type == FieldType.BYTE:
+ return self.__reader.read_byte()
+ if field_type == FieldType.CHAR:
+ return self.__reader.read_char()
+ if field_type == FieldType.DOUBLE:
+ return self.__reader.read_double()
+ if field_type == FieldType.FLOAT:
+ return self.__reader.read_float()
+ if field_type == FieldType.INTEGER:
+ return self.__reader.read_int()
+ if field_type == FieldType.LONG:
+ return self.__reader.read_long()
+ if field_type == FieldType.SHORT:
+ return self.__reader.read_short()
+ if field_type == FieldType.BOOLEAN:
+ return self.__reader.read_bool()
+ if field_type in (FieldType.OBJECT, FieldType.ARRAY):
+ sub_type_code = self.__reader.read_byte()
+ if field_type == FieldType.ARRAY:
+ if sub_type_code == TerminalCode.TC_NULL:
+ # Seems required, according to issue #46
+ return None
+ if sub_type_code == TerminalCode.TC_REFERENCE:
+ return self._do_classdesc(sub_type_code)
+ if sub_type_code != TerminalCode.TC_ARRAY:
+ raise ValueError(
+ "Array type listed, but type code != TC_ARRAY"
+ )
+
+ content = self._read_content(sub_type_code, False)
+ if content is not None and content.is_exception:
+ raise ExceptionRead(content)
+
+ return content
+
+ raise ValueError("Can't process type: {0}".format(field_type))
+
+ def _do_reference(self, type_code=0):
+ # type: (int) -> ParsedJavaContent
+ """
+ Returns an object already parsed
+ """
+ handle = self.__reader.read_int()
+ try:
+ return self.__handles[handle]
+ except KeyError:
+ raise ValueError("Invalid reference handle: {0:x}".format(handle))
+
+ def _do_enum(self, type_code):
+ # type: (int) -> JavaEnum
+ """
+ Parses an enumeration
+ """
+ cd = self._read_classdesc()
+ if cd is None:
+ raise ValueError("Enum description can't be null")
+
+ handle = self._new_handle()
+
+ # Read the enum string
+ sub_type_code = self.__reader.read_byte()
+ enum_str = self._read_new_string(sub_type_code)
+ cd.enum_constants.add(enum_str.value)
+
+ # Store the object
+ enum_obj = JavaEnum(handle, cd, enum_str)
+ self._set_handle(handle, enum_obj)
+ return enum_obj
+
+ def _do_class(self, type_code):
+ # type: (int) -> JavaClass
+ """
+ Parses a class
+ """
+ cd = self._read_classdesc()
+ handle = self._new_handle()
+ class_obj = JavaClass(handle, cd)
+
+ # Store the class object
+ self._set_handle(handle, class_obj)
+ return class_obj
+
+ def _do_array(self, type_code):
+ # type: (int) -> JavaArray
+ """
+ Parses an array
+ """
+ cd = self._read_classdesc()
+ handle = self._new_handle()
+ if not cd.name or len(cd.name) < 2:
+ raise ValueError("Invalid name in array class description")
+
+ # ParsedJavaContent type
+ content_type_byte = ord(cd.name[1].encode("latin1"))
+ field_type = FieldType(content_type_byte)
+
+ # Array size
+ size = self.__reader.read_int()
+ if size < 0:
+ raise ValueError("Invalid array size")
+
+ # Array content
+ for transformer in self.__transformers:
+ content = transformer.load_array(
+ self.__reader, field_type.type_code(), size
+ )
+ if content is not None:
+ break
+ else:
+ content = [self._read_field_value(field_type) for _ in range(size)]
+
+ return JavaArray(handle, cd, field_type, content)
+
+ def _do_exception(self, type_code):
+ # type: (int) -> ParsedJavaContent
+ """
+ Read the content of a thrown exception
+ """
+ # Start by resetting current state
+ self._reset()
+
+ type_code = self.__reader.read_byte()
+ if type_code == TerminalCode.TC_RESET:
+ raise ValueError("TC_RESET read while reading exception")
+
+ content = self._read_content(type_code, False)
+ if content is None:
+ raise ValueError("Null exception object")
+
+ if not isinstance(content, JavaInstance):
+ raise ValueError("Exception object is not an instance")
+
+ if content.is_exception:
+ raise ExceptionRead(content)
+
+ # Strange object ?
+ content.is_exception = True
+ self._reset()
+ return content
+
+ def _do_block_data(self, type_code):
+ # type: (int) -> BlockData
+ """
+ Reads a block data
+ """
+ # Parse the size
+ if type_code == TerminalCode.TC_BLOCKDATA:
+ size = self.__reader.read_ubyte()
+ elif type_code == TerminalCode.TC_BLOCKDATALONG:
+ size = self.__reader.read_int()
+ else:
+ raise ValueError("Invalid type code for blockdata")
+
+ if size < 0:
+ raise ValueError("Invalid value for block data size")
+
+ # Read the block
+ data = self.__fd.read(size)
+ return BlockData(data)
diff --git a/javaobj/v2/main.py b/javaobj/v2/main.py
new file mode 100644
index 0000000..24b51b0
--- /dev/null
+++ b/javaobj/v2/main.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""
+Mimics the core API with the new deserializer
+"""
+
+from __future__ import absolute_import
+
+from typing import IO, Any # pylint:disable=W0611
+
+try:
+ # Python 2
+ from StringIO import StringIO as BytesIO
+except ImportError:
+ # Python 3+
+ from io import BytesIO
+
+from ..utils import java_data_fd
+from .api import ObjectTransformer # pylint:disable=W0611
+from .core import JavaStreamParser
+from .transformers import DefaultObjectTransformer, NumpyArrayTransformer
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+def load(file_object, *transformers, **kwargs):
+ # type: (IO[bytes], ObjectTransformer, Any) -> Any
+ """
+ Deserializes Java primitive data and objects serialized using
+ ObjectOutputStream from a file-like object.
+
+ :param file_object: A file-like object
+ :param transformers: Custom transformers to use
+ :return: The deserialized object
+ """
+ # Check file format (uncompress if necessary)
+ file_object = java_data_fd(file_object)
+
+ # Ensure we have the default object transformer
+ all_transformers = list(transformers)
+ for t in all_transformers:
+ if isinstance(t, DefaultObjectTransformer):
+ break
+ else:
+ all_transformers.append(DefaultObjectTransformer())
+
+ if kwargs.get("use_numpy_arrays", False):
+ # Use the numpy array transformer if requested
+ all_transformers.append(NumpyArrayTransformer())
+
+ # Parse the object(s)
+ parser = JavaStreamParser(file_object, all_transformers)
+ contents = parser.run()
+
+ if len(contents) == 0:
+ # Nothing was parsed, but no error
+ return None
+ elif len(contents) == 1:
+ # Return the only object as is
+ return contents[0]
+ else:
+ # Returns all objects if they are more than one
+ return contents
+
+
+def loads(data, *transformers, **kwargs):
+ # type: (bytes, ObjectTransformer, Any) -> Any
+ """
+ Deserializes Java objects and primitive data serialized using
+ ObjectOutputStream from bytes.
+
+ :param data: A Java data string
+ :param transformers: Custom transformers to use
+ :param ignore_remaining_data: If True, don't log an error when unused
+ trailing bytes are remaining
+ :return: The deserialized object
+ """
+ return load(BytesIO(data), *transformers, **kwargs)
diff --git a/javaobj/v2/stream.py b/javaobj/v2/stream.py
new file mode 100644
index 0000000..7cb8a9f
--- /dev/null
+++ b/javaobj/v2/stream.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""
+Utility module to handle streams like in Java
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+import struct
+from typing import IO, Any, Tuple # pylint:disable=W0611
+
+from ..modifiedutf8 import decode_modified_utf8
+from ..utils import UNICODE_TYPE, unicode_char # pylint:disable=W0611
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class DataStreamReader:
+ """
+ Reads the given file object with object input stream-like methods
+ """
+
+ def __init__(self, fd):
+ # type: (IO[bytes]) -> None
+ """
+ :param fd: The input stream
+ """
+ self.__fd = fd
+
+ @property
+ def file_descriptor(self):
+ # type: () -> IO[bytes]
+ """
+ The underlying file descriptor
+ """
+ return self.__fd
+
+ def read(self, struct_format):
+ # type: (str) -> Tuple[Any, ...]
+ """
+ Reads from the input stream, using struct
+
+ :param struct_format: An unpack format string
+ :return: The result of struct.unpack (tuple)
+ :raise EOFError: End of stream reached during unpacking
+ """
+ length = struct.calcsize(struct_format)
+ bytes_array = self.__fd.read(length)
+
+ if len(bytes_array) != length:
+ raise EOFError("Stream has ended unexpectedly while parsing.")
+
+ return struct.unpack(struct_format, bytes_array)
+
+ def read_bool(self):
+ # type: () -> bool
+ """
+ Shortcut to read a single `boolean` (1 byte)
+ """
+ return bool(self.read(">B")[0])
+
+ def read_byte(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `byte` (1 byte)
+ """
+ return self.read(">b")[0]
+
+ def read_ubyte(self):
+ # type: () -> int
+ """
+ Shortcut to read an unsigned `byte` (1 byte)
+ """
+ return self.read(">B")[0]
+
+ def read_char(self):
+ # type: () -> UNICODE_TYPE
+ """
+ Shortcut to read a single `char` (2 bytes)
+ """
+ return unicode_char(self.read(">H")[0])
+
+ def read_short(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `short` (2 bytes)
+ """
+ return self.read(">h")[0]
+
+ def read_ushort(self):
+ # type: () -> int
+ """
+ Shortcut to read an unsigned `short` (2 bytes)
+ """
+ return self.read(">H")[0]
+
+ def read_int(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `int` (4 bytes)
+ """
+ return self.read(">i")[0]
+
+ def read_float(self):
+ # type: () -> float
+ """
+ Shortcut to read a single `float` (4 bytes)
+ """
+ return self.read(">f")[0]
+
+ def read_long(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `long` (8 bytes)
+ """
+ return self.read(">q")[0]
+
+ def read_double(self):
+ # type: () -> float
+ """
+ Shortcut to read a single `double` (8 bytes)
+ """
+ return self.read(">d")[0]
+
+ def read_UTF(self): # pylint:disable=C0103
+ # type: () -> str
+ """
+ Reads a Java string
+ """
+ length = self.read_ushort()
+ ba = self.__fd.read(length)
+ return decode_modified_utf8(ba)[0]
diff --git a/javaobj/v2/transformers.py b/javaobj/v2/transformers.py
new file mode 100644
index 0000000..087eea9
--- /dev/null
+++ b/javaobj/v2/transformers.py
@@ -0,0 +1,534 @@
+#!/usr/bin/env python3
+"""
+Defines the default object transformers
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+# Standard library
+import functools
+from typing import List, Optional, Tuple
+
+# Numpy (optional)
+try:
+ import numpy
+except ImportError:
+ numpy = None # type: ignore
+
+# Javaobj
+from ..constants import TerminalCode, TypeCode
+from ..utils import log_debug, log_error, read_string, read_struct, to_bytes
+from .api import IJavaStreamParser, ObjectTransformer
+from .beans import ( # pylint:disable=W0611
+ BlockData,
+ JavaClassDesc,
+ JavaInstance,
+)
+from .stream import DataStreamReader
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaList(list, JavaInstance):
+ """
+ Python-Java list bridge type
+ """
+
+ HANDLED_CLASSES = ("java.util.ArrayList", "java.util.LinkedList")
+
+ def __init__(self):
+ list.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ self.extend(ann for ann in annotations[1:])
+ return True
+
+ return False
+
+
+@functools.total_ordering
+class JavaPrimitiveClass(JavaInstance):
+ """
+ Parent of Java classes matching a primitive (Bool, Integer, Long, ...)
+ """
+
+ def __init__(self):
+ JavaInstance.__init__(self)
+ self.value = None
+
+ def __str__(self):
+ return str(self.value)
+
+ def __repr__(self):
+ return repr(self.value)
+
+ def __hash__(self):
+ return hash(self.value)
+
+ def __eq__(self, other):
+ return self.value == other
+
+ def __lt__(self, other):
+ return self.value < other
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ for fields in self.field_data.values():
+ for field, value in fields.items():
+ if field.name == "value":
+ self.value = value
+ return True
+
+ return False
+
+
+class JavaBool(JavaPrimitiveClass):
+ """
+ Represents a Java Boolean object
+ """
+
+ HANDLED_CLASSES = "java.lang.Boolean"
+
+ def __bool__(self):
+ return self.value
+
+
+class JavaInt(JavaPrimitiveClass):
+ """
+ Represents a Java Integer or Long object
+ """
+
+ HANDLED_CLASSES = ("java.lang.Integer", "java.lang.Long")
+
+ def __int__(self):
+ return self.value
+
+
+class JavaMap(dict, JavaInstance):
+ """
+ Python-Java dictionary/map bridge type
+ """
+
+ HANDLED_CLASSES = (
+ "java.util.HashMap",
+ "java.util.TreeMap",
+ ) # type: Tuple[str, ...]
+
+ def __init__(self):
+ dict.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Maps have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in JavaMap.HANDLED_CLASSES:
+ # Group annotation elements 2 by 2
+ args = [iter(annotations[1:])] * 2
+ for key, value in zip(*args):
+ self[key] = value
+
+ return True
+
+ return False
+
+
+class JavaLinkedHashMap(JavaMap):
+ """
+ Linked has map are handled with a specific block data
+ """
+
+ HANDLED_CLASSES = ("java.util.LinkedHashMap",)
+
+ def load_from_blockdata(self, parser, reader, indent=0):
+ # type: (IJavaStreamParser, DataStreamReader, int) -> bool
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Read HashMap fields
+ self.buckets = reader.read_int()
+ self.size = reader.read_int()
+
+ # Read entries
+ for _ in range(self.size):
+ key_code = reader.read_byte()
+ key = parser._read_content(key_code, True)
+
+ value_code = reader.read_byte()
+ value = parser._read_content(value_code, True)
+ self[key] = value
+
+ # Ignore the end of the blockdata
+ type_code = reader.read_byte()
+ if type_code != TerminalCode.TC_ENDBLOCKDATA:
+ raise ValueError("Didn't find the end of block data")
+
+ # Ignore the trailing 0
+ final_byte = reader.read_byte()
+ if final_byte != 0:
+ raise ValueError("Should find 0x0, got {0:x}".format(final_byte))
+
+ return True
+
+
+class JavaSet(set, JavaInstance):
+ """
+ Python-Java set bridge type
+ """
+
+ HANDLED_CLASSES = (
+ "java.util.HashSet",
+ "java.util.LinkedHashSet",
+ ) # type: Tuple[str, ...]
+
+ def __init__(self):
+ set.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ self.update(x for x in annotations[1:])
+ return True
+
+ return False
+
+
+class JavaTreeSet(JavaSet):
+ """
+ Tree sets are handled a bit differently
+ """
+
+ HANDLED_CLASSES = ("java.util.TreeSet",)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ # Annotation[1] == size of the set
+ self.update(x for x in annotations[2:])
+ return True
+
+ return False
+
+
+class JavaTime(JavaInstance):
+ """
+ Represents the classes found in the java.time package
+
+ The semantic of the fields depends on the type of time that has been
+ parsed
+ """
+
+ HANDLED_CLASSES = ("java.time.Ser",) # type: Tuple[str, ...]
+
+ DURATION_TYPE = 1
+ INSTANT_TYPE = 2
+ LOCAL_DATE_TYPE = 3
+ LOCAL_TIME_TYPE = 4
+ LOCAL_DATE_TIME_TYPE = 5
+ ZONE_DATE_TIME_TYPE = 6
+ ZONE_REGION_TYPE = 7
+ ZONE_OFFSET_TYPE = 8
+ OFFSET_TIME_TYPE = 9
+ OFFSET_DATE_TIME_TYPE = 10
+ YEAR_TYPE = 11
+ YEAR_MONTH_TYPE = 12
+ MONTH_DAY_TYPE = 13
+ PERIOD_TYPE = 14
+
+ def __init__(self):
+ JavaInstance.__init__(self)
+ self.type = -1
+ self.year = None
+ self.month = None
+ self.day = None
+ self.hour = None
+ self.minute = None
+ self.second = None
+ self.nano = None
+ self.offset = None
+ self.zone = None
+
+ self.time_handlers = {
+ self.DURATION_TYPE: self.do_duration,
+ self.INSTANT_TYPE: self.do_instant,
+ self.LOCAL_DATE_TYPE: self.do_local_date,
+ self.LOCAL_DATE_TIME_TYPE: self.do_local_date_time,
+ self.LOCAL_TIME_TYPE: self.do_local_time,
+ self.ZONE_DATE_TIME_TYPE: self.do_zoned_date_time,
+ self.ZONE_OFFSET_TYPE: self.do_zone_offset,
+ self.ZONE_REGION_TYPE: self.do_zone_region,
+ self.OFFSET_TIME_TYPE: self.do_offset_time,
+ self.OFFSET_DATE_TIME_TYPE: self.do_offset_date_time,
+ self.YEAR_TYPE: self.do_year,
+ self.YEAR_MONTH_TYPE: self.do_year_month,
+ self.MONTH_DAY_TYPE: self.do_month_day,
+ self.PERIOD_TYPE: self.do_period,
+ }
+
+ def __str__(self):
+ return (
+ "JavaTime(type=0x{s.type}, "
+ "year={s.year}, month={s.month}, day={s.day}, "
+ "hour={s.hour}, minute={s.minute}, second={s.second}, "
+ "nano={s.nano}, offset={s.offset}, zone={s.zone})"
+ ).format(s=self)
+
+ def load_from_blockdata(self, parser, reader, indent=0):
+ """
+ Ignore the SC_BLOCK_DATA flag
+ """
+ return True
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ if not isinstance(annotations[0], BlockData):
+ raise ValueError("Require a BlockData as annotation")
+
+ # Convert back annotations to bytes
+ # latin-1 is used to ensure that bytes are kept as is
+ content = to_bytes(annotations[0].data, "latin1")
+ (self.type,), content = read_struct(content, ">b")
+
+ try:
+ self.time_handlers[self.type](content)
+ except KeyError as ex:
+ log_error("Unhandled kind of time: {}".format(ex))
+
+ return True
+
+ return False
+
+ def do_duration(self, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_instant(self, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_local_date(self, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">ibb")
+ return data
+
+ def do_local_time(self, data):
+ (hour,), data = read_struct(data, ">b")
+ minute = 0
+ second = 0
+ nano = 0
+
+ if hour < 0:
+ hour = ~hour
+ else:
+ (minute,), data = read_struct(data, ">b")
+ if minute < 0:
+ minute = ~minute
+ else:
+ (second,), data = read_struct(data, ">b")
+ if second < 0:
+ second = ~second
+ else:
+ (nano,), data = read_struct(data, ">i")
+
+ self.hour = hour
+ self.minute = minute
+ self.second = second
+ self.nano = nano
+ return data
+
+ def do_local_date_time(self, data):
+ data = self.do_local_date(data)
+ data = self.do_local_time(data)
+ return data
+
+ def do_zoned_date_time(self, data):
+ data = self.do_local_date_time(data)
+ data = self.do_zone_offset(data)
+ data = self.do_zone_region(data)
+ return data
+
+ def do_zone_offset(self, data):
+ (offset_byte,), data = read_struct(data, ">b")
+ if offset_byte == 127:
+ (self.offset,), data = read_struct(data, ">i")
+ else:
+ self.offset = offset_byte * 900
+ return data
+
+ def do_zone_region(self, data):
+ self.zone, data = read_string(data)
+ return data
+
+ def do_offset_time(self, data):
+ data = self.do_local_time(data)
+ data = self.do_zone_offset(data)
+ return data
+
+ def do_offset_date_time(self, data):
+ data = self.do_local_date_time(data)
+ data = self.do_zone_offset(data)
+ return data
+
+ def do_year(self, data):
+ (self.year,), data = read_struct(data, ">i")
+ return data
+
+ def do_year_month(self, data):
+ (self.year, self.month), data = read_struct(data, ">ib")
+ return data
+
+ def do_month_day(self, data):
+ (self.month, self.day), data = read_struct(data, ">bb")
+ return data
+
+ def do_period(self, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">iii")
+ return data
+
+
+class DefaultObjectTransformer(ObjectTransformer):
+ """
+ Provider of the default object transformers
+ """
+
+ KNOWN_TRANSFORMERS = (
+ JavaBool,
+ JavaInt,
+ JavaList,
+ JavaMap,
+ JavaLinkedHashMap,
+ JavaSet,
+ JavaTreeSet,
+ JavaTime,
+ )
+
+ def __init__(self):
+ # Construct the link: Java class name -> Python transformer
+ self._type_mapper = {}
+ for transformer_class in self.KNOWN_TRANSFORMERS:
+ handled_classes = transformer_class.HANDLED_CLASSES
+ if isinstance(handled_classes, str):
+ # Single class handled
+ self._type_mapper[handled_classes] = transformer_class
+ else:
+ # Multiple classes handled
+ for class_name in transformer_class.HANDLED_CLASSES:
+ self._type_mapper[class_name] = transformer_class
+
+ def create_instance(self, classdesc):
+ # type: (JavaClassDesc) -> Optional[JavaInstance]
+ """
+ Transforms a parsed Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ try:
+ mapped_type = self._type_mapper[classdesc.name]
+ except KeyError:
+ # Return None if not handled
+ return None
+ else:
+ log_debug("---")
+ log_debug(classdesc.name)
+ log_debug("---")
+
+ java_object = mapped_type()
+ java_object.classdesc = classdesc
+
+ log_debug(">>> java_object: {0}".format(java_object))
+ return java_object
+
+
+class NumpyArrayTransformer(ObjectTransformer):
+ """
+ Loads arrays as numpy arrays if possible
+ """
+
+ # Convertion of a Java type char to its NumPy equivalent
+ NUMPY_TYPE_MAP = {
+ TypeCode.TYPE_BYTE: "B",
+ TypeCode.TYPE_CHAR: "b",
+ TypeCode.TYPE_DOUBLE: ">d",
+ TypeCode.TYPE_FLOAT: ">f",
+ TypeCode.TYPE_INTEGER: ">i",
+ TypeCode.TYPE_LONG: ">l",
+ TypeCode.TYPE_SHORT: ">h",
+ TypeCode.TYPE_BOOLEAN: ">B",
+ }
+
+ def load_array(self, reader, type_code, size):
+ # type: (DataStreamReader, TypeCode, int) -> Optional[list]
+ """
+ Loads a Java array, if possible
+ """
+ if numpy is not None:
+ try:
+ dtype = self.NUMPY_TYPE_MAP[type_code]
+ except KeyError:
+ # Unhandled data type
+ return None
+ else:
+ return numpy.fromfile(
+ reader.file_descriptor, dtype=dtype, count=size,
+ )
+
+ return None
diff --git a/manifest.in b/manifest.in
new file mode 100644
index 0000000..cf4e570
--- /dev/null
+++ b/manifest.in
@@ -0,0 +1,8 @@
+# Include the README
+include README.md
+
+# Include the authors file
+include AUTHORS
+
+# Include the license file
+include LICENSE
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..8789351
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,58 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["javaobj"]
+
+[project]
+name = "javaobj-py3"
+version = "0.4.4"
+description = "Module for serializing and de-serializing Java objects."
+readme = "README.md"
+license = "Apache-2.0"
+authors = [
+ { name = "Volodymyr Buell", email = "vbuell@gmail.com" }
+]
+maintainers = [
+ { name = "Thomas Calmant", email = "thomas.calmant@gmail.com" }
+]
+keywords = ["python", "java", "marshalling", "serialization"]
+classifiers = [
+ "Development Status :: 3 - Alpha",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Software Development :: Libraries :: Python Modules"
+]
+
+dependencies = [
+ "enum34; python_version<='3.4'",
+ "typing; python_version<='3.4'"
+]
+
+[project.optional-dependencies]
+test = ["pytest"]
+
+[project.urls]
+Homepage = "https://github.com/tcalmant/python-javaobj"
+Issues = "http://github.com/tcalmant/python-javaobj/issues"
+Source = "http://github.com/tcalmant/python-javaobj/"
+
+[tool.hatch.envs.test]
+dependencies = ["pytest"]
+
+[tool.hatch.envs.test.scripts]
+run = "pytest tests"
+
+[tool.black]
+line-length = 79
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..17b0412
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+enum34;python_version<="3.4"
+typing;python_version<="3.4"
diff --git a/setup.py b/setup.py
index dbe219a..cf93fb8 100644
--- a/setup.py
+++ b/setup.py
@@ -7,12 +7,12 @@
:authors: Volodymyr Buell, Thomas Calmant
:license: Apache License 2.0
-:version: 0.2.4
+:version: 0.4.4
:status: Alpha
..
- Copyright 2016 Thomas Calmant
+ Copyright 2024 Thomas Calmant
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@
# ------------------------------------------------------------------------------
# Module version
-__version_info__ = (0, 2, 4)
+__version_info__ = (0, 4, 4)
__version__ = ".".join(str(x) for x in __version_info__)
# Documentation strings format
@@ -53,8 +53,10 @@ def read(fname):
with open(os.path.join(os.path.dirname(__file__), fname)) as fd:
return fd.read()
+
# ------------------------------------------------------------------------------
+
setup(
name="javaobj-py3",
version=__version__,
@@ -64,18 +66,31 @@ def read(fname):
maintainer_email="thomas.calmant@gmail.com",
url="https://github.com/tcalmant/python-javaobj",
description="Module for serializing and de-serializing Java objects.",
- license='Apache License 2.0',
+ license="Apache License 2.0",
+ license_file="LICENSE",
keywords="python java marshalling serialization",
- py_modules=['javaobj'],
- test_suite="tests.tests",
- long_description=read('README.rst'),
+ packages=["javaobj", "javaobj.v1", "javaobj.v2"],
+ test_suite="tests",
+ install_requires=[
+ 'enum34;python_version<="3.4"',
+ 'typing;python_version<="3.4"',
+ ],
+ long_description=read("README.md"),
+ long_description_content_type="text/markdown",
classifiers=[
- "Development Status :: 3 - Alpha",
- "License :: OSI Approved :: Apache Software License",
- 'Operating System :: OS Independent',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6',
- "Topic :: Software Development :: Libraries :: Python Modules",
- ])
+ "Development Status :: 3 - Alpha",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Software Development :: Libraries :: Python Modules",
+ ],
+)
diff --git a/tests/java/.classpath b/tests/java/.classpath
deleted file mode 100644
index 61c3fab..0000000
--- a/tests/java/.classpath
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-
-
-
-
-
diff --git a/tests/java/.gitignore b/tests/java/.gitignore
index 073063f..562f411 100644
--- a/tests/java/.gitignore
+++ b/tests/java/.gitignore
@@ -4,3 +4,8 @@ target/
# Generated files
*.ser
+# Project files
+.idea/
+.classpath
+.project
+.settings/
diff --git a/tests/java/pom.xml b/tests/java/pom.xml
index 12f7250..d4cc75d 100644
--- a/tests/java/pom.xml
+++ b/tests/java/pom.xml
@@ -9,14 +9,16 @@
UTF-8
+ 1.7
+ 1.7
junit
junit
- 4.9
+ 4.13.1
test
-
\ No newline at end of file
+
diff --git a/tests/java/src/test/java/OneTest.java b/tests/java/src/test/java/OneTest.java
index 7e2025f..7ffb10a 100644
--- a/tests/java/src/test/java/OneTest.java
+++ b/tests/java/src/test/java/OneTest.java
@@ -1,13 +1,29 @@
import java.awt.event.WindowAdapter;
import java.awt.event.WindowEvent;
import java.io.ByteArrayOutputStream;
+import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZonedDateTime;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.Hashtable;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
import java.util.Vector;
+import java.util.Random;
+import java.util.zip.GZIPOutputStream;
import javax.swing.JScrollPane;
import javax.swing.SwingUtilities;
@@ -154,10 +170,16 @@ private void writeObject(final ObjectOutputStream oos)
ObjectOutputStream oos;
+ /**
+ * Returns the name of the file where to serialize the test content
+ */
+ private String getTestFileName() {
+ return name.getMethodName() + ".ser";
+ }
+
@Before
public void setUp() throws Exception {
- oos = new ObjectOutputStream(fos = new FileOutputStream(
- name.getMethodName() + ".ser"));
+ oos = new ObjectOutputStream(fos = new FileOutputStream(getTestFileName()));
}
@Test
@@ -194,6 +216,60 @@ public void testChar() throws IOException {
public void testChars() throws IOException {
oos.writeChars("python-javaobj");
oos.close();
+
+ // Also compress the file
+ final String serializedFileName = getTestFileName();
+ final String gzippedFileName = serializedFileName + ".gz";
+
+ try (final GZIPOutputStream out = new GZIPOutputStream(new FileOutputStream(gzippedFileName))){
+ try (final FileInputStream in = new FileInputStream(serializedFileName)){
+ final byte[] buffer = new byte[1024];
+ int len;
+ while((len = in.read(buffer)) != -1){
+ out.write(buffer, 0, len);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testCharArray() throws IOException {
+ char[] array = new char[] {
+ '\u0000', '\ud800',
+ '\u0001', '\udc00',
+ '\u0002', '\uffff',
+ '\u0003'
+ };
+ oos.writeObject(array);
+ oos.close();
+ }
+
+ @Test
+ public void test2DArray() throws IOException {
+ int[][] array = new int[][] {
+ new int[] {1, 2, 3},
+ new int[] {4, 5, 6},
+ };
+ oos.writeObject(array);
+ oos.close();
+ }
+
+ @Test
+ public void testClassArray() throws IOException {
+ Class>[] array = new Class>[] {
+ Integer.class,
+ ObjectOutputStream.class,
+ Exception.class,
+ };
+ oos.writeObject(array);
+ oos.close();
+ }
+
+ @Test
+ public void testJapan() throws IOException {
+ String stateOfJapan = "日本国";
+ oos.writeObject(stateOfJapan);
+ oos.close();
}
@Test
@@ -249,6 +325,81 @@ public void testSuper() throws Exception {
oos.flush();
}
+ @Test
+ public void testHashSet() throws Exception {
+ final Set set = new HashSet();
+ set.add(1);
+ set.add(2);
+ set.add(1);
+ set.add(42);
+ oos.writeObject(set);
+ oos.flush();
+ }
+
+ @Test
+ public void testLinkedHashSet() throws Exception {
+ final Set set = new LinkedHashSet();
+ set.add(1);
+ set.add(2);
+ set.add(1);
+ set.add(42);
+ oos.writeObject(set);
+ oos.flush();
+ }
+
+ @Test
+ public void testTreeSet() throws Exception {
+ final Set set = new TreeSet();
+ set.add(1);
+ set.add(2);
+ set.add(1);
+ set.add(42);
+ oos.writeObject(set);
+ oos.flush();
+ }
+
+ @Test
+ public void testTime() throws Exception {
+ oos.writeObject(new Object[] {
+ Duration.ofSeconds(10),
+ Instant.now(),
+ LocalDate.now(),
+ LocalTime.now(),
+ LocalDateTime.now(),
+ ZoneId.systemDefault(),
+ ZonedDateTime.now(),
+ });
+ oos.flush();
+ }
+
+ /**
+ * Tests th pull request #27 by @qistoph:
+ * Add support for java.lang.Bool, Integer and Long classes
+ */
+ @Test
+ public void testBoolIntLong() throws Exception {
+ Map hm1 = new HashMap();
+ hm1.put("key1", "value1");
+ hm1.put("key2", "value2");
+ hm1.put("int", 9);
+ hm1.put("int2", new Integer(10));
+ hm1.put("bool", true);
+ hm1.put("bool2", new Boolean(true));
+
+ oos.writeObject(hm1);
+ oos.flush();
+
+ Map hm2 = new HashMap();
+ hm2.put("subMap", hm1);
+
+ ObjectOutputStream oos2 = new ObjectOutputStream(new FileOutputStream(name.getMethodName() + "-2.ser"));
+ try {
+ oos2.writeObject(hm2);
+ } finally {
+ oos2.close();
+ }
+ }
+
@Test
public void testSwingObject() throws Exception {
@@ -281,115 +432,70 @@ public void windowClosing(final WindowEvent e) {
});
}
- // public void test_readObject() throws Exception {
- // String s = "HelloWorld";
- // oos.writeObject(s);
- // oos.close();
- // ois = new ObjectInputStream(new ByteArrayInputStream(bao.toByteArray()));
- // assertEquals("Read incorrect Object value", s, ois.readObject());
- // ois.close();
- //
- // // Regression for HARMONY-91
- // // dynamically create serialization byte array for the next hierarchy:
- // // - class A implements Serializable
- // // - class C extends A
- //
- // byte[] cName = C.class.getName().getBytes("UTF-8");
- // byte[] aName = A.class.getName().getBytes("UTF-8");
- //
- // ByteArrayOutputStream out = new ByteArrayOutputStream();
- //
- // byte[] begStream = new byte[] { (byte) 0xac, (byte) 0xed, // STREAM_MAGIC
- // (byte) 0x00, (byte) 0x05, // STREAM_VERSION
- // (byte) 0x73, // TC_OBJECT
- // (byte) 0x72, // TC_CLASSDESC
- // (byte) 0x00, // only first byte for C class name length
- // };
- //
- // out.write(begStream, 0, begStream.length);
- // out.write(cName.length); // second byte for C class name length
- // out.write(cName, 0, cName.length); // C class name
- //
- // byte[] midStream = new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0x00,
- // (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
- // (byte) 0x21, // serialVersionUID = 33L
- // (byte) 0x02, // flags
- // (byte) 0x00, (byte) 0x00, // fields : none
- // (byte) 0x78, // TC_ENDBLOCKDATA
- // (byte) 0x72, // Super class for C: TC_CLASSDESC for A class
- // (byte) 0x00, // only first byte for A class name length
- // };
- //
- // out.write(midStream, 0, midStream.length);
- // out.write(aName.length); // second byte for A class name length
- // out.write(aName, 0, aName.length); // A class name
- //
- // byte[] endStream = new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0x00,
- // (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
- // (byte) 0x0b, // serialVersionUID = 11L
- // (byte) 0x02, // flags
- // (byte) 0x00, (byte) 0x01, // fields
- //
- // (byte) 0x4c, // field description: type L (object)
- // (byte) 0x00, (byte) 0x04, // length
- // // field = 'name'
- // (byte) 0x6e, (byte) 0x61, (byte) 0x6d, (byte) 0x65,
- //
- // (byte) 0x74, // className1: TC_STRING
- // (byte) 0x00, (byte) 0x12, // length
- // //
- // (byte) 0x4c, (byte) 0x6a, (byte) 0x61, (byte) 0x76,
- // (byte) 0x61, (byte) 0x2f, (byte) 0x6c, (byte) 0x61,
- // (byte) 0x6e, (byte) 0x67, (byte) 0x2f, (byte) 0x53,
- // (byte) 0x74, (byte) 0x72, (byte) 0x69, (byte) 0x6e,
- // (byte) 0x67, (byte) 0x3b,
- //
- // (byte) 0x78, // TC_ENDBLOCKDATA
- // (byte) 0x70, // NULL super class for A class
- //
- // // classdata
- // (byte) 0x74, // TC_STRING
- // (byte) 0x00, (byte) 0x04, // length
- // (byte) 0x6e, (byte) 0x61, (byte) 0x6d, (byte) 0x65, // value
- // };
- //
- // out.write(endStream, 0, endStream.length);
- // out.flush();
- //
- // // read created serial. form
- // ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(
- // out.toByteArray()));
- // Object o = ois.readObject();
- // assertEquals(C.class, o.getClass());
- //
- // // Regression for HARMONY-846
- // assertNull(new ObjectInputStream() {}.readObject());
- // }
+ /**
+ * Tests the pull request #38 by @UruDev:
+ * Add support for custom writeObject
+ */
+ @Test
+ public void testCustomWriteObject() throws Exception {
+ CustomClass writer = new CustomClass();
+ writer.start(oos);
+ }
}
class SuperAaaa implements Serializable {
-
- /**
- *
- */
private static final long serialVersionUID = 1L;
public boolean bool = true;
public int integer = -1;
public String superString = "Super!!";
-
}
class TestConcrete extends SuperAaaa implements Serializable {
-
- /**
- *
- */
private static final long serialVersionUID = 1L;
public String childString = "Child!!";
TestConcrete() {
super();
}
+}
+
+//Custom writeObject section
+class CustomClass implements Serializable {
+ private static final long serialVersionUID = 1;
+
+ public void start(ObjectOutputStream out) throws Exception {
+ this.writeObject(out);
+ }
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ CustomWriter custom = new CustomWriter(42);
+ out.writeObject(custom);
+ out.flush();
+ }
+}
+
+class RandomChild extends Random {
+ private static final long serialVersionUID = 1;
+ private int num = 1;
+ private double doub = 4.5;
+
+ RandomChild(int seed) {
+ super(seed);
+ }
+}
+
+class CustomWriter implements Serializable {
+ protected RandomChild custom_obj = null;
+
+ CustomWriter(int seed) {
+ custom_obj = new RandomChild(seed);
+ }
+ private static final long serialVersionUID = 1;
+ private static final int CURRENT_SERIAL_VERSION = 0;
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ out.writeInt(CURRENT_SERIAL_VERSION);
+ out.writeObject(custom_obj);
+ }
}
diff --git a/tests/java/testChars.ser.gz b/tests/java/testChars.ser.gz
new file mode 100644
index 0000000..f1f9db1
Binary files /dev/null and b/tests/java/testChars.ser.gz differ
diff --git a/tests/test2DArray.ser b/tests/test2DArray.ser
new file mode 100644
index 0000000..d0f58dc
Binary files /dev/null and b/tests/test2DArray.ser differ
diff --git a/tests/testBoolIntLong-2.ser b/tests/testBoolIntLong-2.ser
new file mode 100644
index 0000000..aae4a2b
Binary files /dev/null and b/tests/testBoolIntLong-2.ser differ
diff --git a/tests/testBoolIntLong.ser b/tests/testBoolIntLong.ser
new file mode 100644
index 0000000..daa6bc1
Binary files /dev/null and b/tests/testBoolIntLong.ser differ
diff --git a/tests/testChars.ser.gz b/tests/testChars.ser.gz
new file mode 100644
index 0000000..f1f9db1
Binary files /dev/null and b/tests/testChars.ser.gz differ
diff --git a/tests/testClassArray.ser b/tests/testClassArray.ser
new file mode 100644
index 0000000..e5501ae
Binary files /dev/null and b/tests/testClassArray.ser differ
diff --git a/tests/testCustomWriteObject.ser b/tests/testCustomWriteObject.ser
new file mode 100644
index 0000000..72e77af
Binary files /dev/null and b/tests/testCustomWriteObject.ser differ
diff --git a/tests/testHashSet.ser b/tests/testHashSet.ser
new file mode 100644
index 0000000..85776fd
Binary files /dev/null and b/tests/testHashSet.ser differ
diff --git a/tests/testJapan.ser b/tests/testJapan.ser
new file mode 100644
index 0000000..2c3634a
Binary files /dev/null and b/tests/testJapan.ser differ
diff --git a/tests/testLinkedHashSet.ser b/tests/testLinkedHashSet.ser
new file mode 100644
index 0000000..449edd8
Binary files /dev/null and b/tests/testLinkedHashSet.ser differ
diff --git a/tests/testTime.ser b/tests/testTime.ser
new file mode 100644
index 0000000..92fe968
Binary files /dev/null and b/tests/testTime.ser differ
diff --git a/tests/testTreeSet.ser b/tests/testTreeSet.ser
new file mode 100644
index 0000000..2efdfa1
Binary files /dev/null and b/tests/testTreeSet.ser differ
diff --git a/tests/tests.py b/tests/test_v1.py
similarity index 62%
rename from tests/tests.py
rename to tests/test_v1.py
index cfe358f..162b2db 100644
--- a/tests/tests.py
+++ b/tests/test_v1.py
@@ -1,5 +1,5 @@
#!/usr/bin/python
-# -- Content-Encoding: UTF-8 --
+# -- Content-Encoding: utf-8 --
"""
Tests for javaobj
@@ -8,12 +8,12 @@
:authors: Volodymyr Buell, Thomas Calmant
:license: Apache License 2.0
-:version: 0.2.3
+:version: 0.4.4
:status: Alpha
..
- Copyright 2016 Thomas Calmant
+ Copyright 2024 Thomas Calmant
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -28,18 +28,22 @@
limitations under the License.
"""
+# Print is used in tests
+from __future__ import print_function
+
# Standard library
import logging
-import subprocess
-import unittest
import os
+import subprocess
import sys
+import unittest
# Prepare Python path to import javaobj
sys.path.insert(0, os.path.abspath(os.path.dirname(os.getcwd())))
# Local
-import javaobj
+import javaobj.v1 as javaobj
+from javaobj.utils import hexdump, java_data_fd
# ------------------------------------------------------------------------------
@@ -51,10 +55,11 @@
# ------------------------------------------------------------------------------
-class TestJavaobj(unittest.TestCase):
+class TestJavaobjV1(unittest.TestCase):
"""
- Full test suite for javaobj
+ Full test suite for javaobj V1 parser
"""
+
@classmethod
def setUpClass(cls):
"""
@@ -62,13 +67,14 @@ def setUpClass(cls):
data
"""
# Compute the java directory
- java_dir = os.path.join(os.path.dirname(__file__), 'java')
+ java_dir = os.path.join(os.path.dirname(__file__), "java")
- # Run Maven and go back to the working folder
- cwd = os.getcwd()
- os.chdir(java_dir)
- subprocess.call('mvn test', shell=True)
- os.chdir(cwd)
+ if not os.getenv("JAVAOBJ_NO_MAVEN"):
+ # Run Maven and go back to the working folder
+ cwd = os.getcwd()
+ os.chdir(java_dir)
+ subprocess.call("mvn test", shell=True)
+ os.chdir(cwd)
def read_file(self, filename, stream=False):
"""
@@ -78,9 +84,10 @@ def read_file(self, filename, stream=False):
:param stream: If True, return the file stream
:return: File content or stream
"""
- for subfolder in ('java', ''):
+ for subfolder in ("java", ""):
found_file = os.path.join(
- os.path.dirname(__file__), subfolder, filename)
+ os.path.dirname(__file__), subfolder, filename
+ )
if os.path.exists(found_file):
break
else:
@@ -89,26 +96,25 @@ def read_file(self, filename, stream=False):
if stream:
return open(found_file, "rb")
else:
- with open(found_file, 'rb') as filep:
+ with open(found_file, "rb") as filep:
return filep.read()
def _try_marshalling(self, original_stream, original_object):
"""
Tries to marshall an object and compares it to the original stream
"""
+ _logger.debug("Try Marshalling")
marshalled_stream = javaobj.dumps(original_object)
# Reloading the new dump allows to compare the decoding sequence
try:
javaobj.loads(marshalled_stream)
self.assertEqual(original_stream, marshalled_stream)
- except:
+ except Exception:
print("-" * 80)
print("=" * 30, "Original", "=" * 30)
- print(javaobj.JavaObjectUnmarshaller._create_hexdump(
- original_stream))
+ print(hexdump(original_stream))
print("*" * 30, "Marshalled", "*" * 30)
- print(javaobj.JavaObjectUnmarshaller._create_hexdump(
- marshalled_stream))
+ print(hexdump(marshalled_stream))
print("-" * 80)
raise
@@ -119,7 +125,7 @@ def test_char_rw(self):
jobj = self.read_file("testChar.ser")
pobj = javaobj.loads(jobj)
_logger.debug("Read char object: %s", pobj)
- self.assertEqual(pobj, '\x00C')
+ self.assertEqual(pobj, "\x00C")
self._try_marshalling(jobj, pobj)
def test_chars_rw(self):
@@ -135,6 +141,34 @@ def test_chars_rw(self):
self.assertEqual(pobj, expected)
self._try_marshalling(jobj, pobj)
+ def test_gzip_open(self):
+ """
+ Tests if the GZip auto-uncompress works
+ """
+ with java_data_fd(self.read_file("testChars.ser", stream=True)) as fd:
+ base = fd.read()
+
+ with java_data_fd(
+ self.read_file("testChars.ser.gz", stream=True)
+ ) as fd:
+ gzipped = fd.read()
+
+ self.assertEqual(
+ base, gzipped, "Uncompressed content doesn't match the original"
+ )
+
+ def test_chars_gzip(self):
+ """
+ Reads testChars.ser.gz
+ """
+ # Expected string as a UTF-16 string
+ expected = "python-javaobj".encode("utf-16-be").decode("latin1")
+
+ jobj = self.read_file("testChars.ser.gz")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read char objects: %s", pobj)
+ self.assertEqual(pobj, expected)
+
def test_double_rw(self):
"""
Reads testDouble.ser and checks the serialization process
@@ -143,7 +177,7 @@ def test_double_rw(self):
pobj = javaobj.loads(jobj)
_logger.debug("Read double object: %s", pobj)
- self.assertEqual(pobj, '\x7f\xef\xff\xff\xff\xff\xff\xff')
+ self.assertEqual(pobj, "\x7f\xef\xff\xff\xff\xff\xff\xff")
self._try_marshalling(jobj, pobj)
def test_bytes_rw(self):
@@ -154,10 +188,13 @@ def test_bytes_rw(self):
pobj = javaobj.loads(jobj)
_logger.debug("Read bytes: %s", pobj)
- self.assertEqual(pobj, 'HelloWorld')
+ self.assertEqual(pobj, "HelloWorld")
self._try_marshalling(jobj, pobj)
def test_class_with_byte_array_rw(self):
+ """
+ Tests handling of classes containing a Byte Array
+ """
jobj = self.read_file("testClassWithByteArray.ser")
pobj = javaobj.loads(jobj)
@@ -201,7 +238,7 @@ def test_fields(self):
pobj = javaobj.loads(jobj)
_logger.debug("Read object: %s", pobj)
- self.assertEqual(pobj.aField1, 'Gabba')
+ self.assertEqual(pobj.aField1, u"Gabba")
self.assertEqual(pobj.aField2, None)
classdesc = pobj.get_class()
@@ -224,7 +261,7 @@ def test_class(self):
jobj = self.read_file("testClass.ser")
pobj = javaobj.loads(jobj)
_logger.debug("Read object: %s", pobj)
- self.assertEqual(pobj.name, 'java.lang.String')
+ self.assertEqual(pobj.name, "java.lang.String")
self._try_marshalling(jobj, pobj)
# def test_swing_object(self):
@@ -241,6 +278,9 @@ def test_class(self):
# _logger.debug(".. Fields Types: %s", classdesc.fields_types)
def test_super(self):
+ """
+ Tests basic class inheritance handling
+ """
jobj = self.read_file("objSuper.ser")
pobj = javaobj.loads(jobj)
_logger.debug(pobj)
@@ -250,14 +290,17 @@ def test_super(self):
_logger.debug(classdesc.fields_names)
_logger.debug(classdesc.fields_types)
- self.assertEqual(pobj.childString, "Child!!")
+ self.assertEqual(pobj.childString, u"Child!!")
self.assertEqual(pobj.bool, True)
self.assertEqual(pobj.integer, -1)
- self.assertEqual(pobj.superString, "Super!!")
+ self.assertEqual(pobj.superString, u"Super!!")
self._try_marshalling(jobj, pobj)
def test_arrays(self):
+ """
+ Tests handling of Java arrays
+ """
jobj = self.read_file("objArrays.ser")
pobj = javaobj.loads(jobj)
_logger.debug(pobj)
@@ -280,14 +323,57 @@ def test_arrays(self):
self._try_marshalling(jobj, pobj)
+ def test_japan(self):
+ """
+ Tests the UTF encoding handling with Japanese characters
+ """
+ # Japan.ser contains a string using wide characters: the name of the
+ # state from Japan (according to wikipedia)
+ jobj = self.read_file("testJapan.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ # Compare the UTF-8 encoded version of the name
+ self.assertEqual(
+ pobj, b"\xe6\x97\xa5\xe6\x9c\xac\xe5\x9b\xbd".decode("utf-8")
+ )
+ self._try_marshalling(jobj, pobj)
+
def test_char_array(self):
+ """
+ Tests the loading of a wide-char array
+ """
jobj = self.read_file("testCharArray.ser")
pobj = javaobj.loads(jobj)
_logger.debug(pobj)
- self.assertEqual(pobj, [u'\u0000', u'\ud800', u'\u0001', u'\udc00', u'\u0002', u'\uffff', u'\u0003'])
+ self.assertEqual(
+ pobj,
+ [
+ u"\u0000",
+ u"\ud800",
+ u"\u0001",
+ u"\udc00",
+ u"\u0002",
+ u"\uffff",
+ u"\u0003",
+ ],
+ )
self._try_marshalling(jobj, pobj)
+ def test_2d_array(self):
+ """
+ Tests the handling of a 2D array
+ """
+ jobj = self.read_file("test2DArray.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ self.assertEqual(
+ pobj, [[1, 2, 3], [4, 5, 6],],
+ )
+
def test_enums(self):
+ """
+ Tests the handling of "enum" types
+ """
jobj = self.read_file("objEnums.ser")
pobj = javaobj.loads(jobj)
_logger.debug(pobj)
@@ -299,14 +385,49 @@ def test_enums(self):
self.assertEqual(classdesc.name, "ClassWithEnum")
self.assertEqual(pobj.color.classdesc.name, "Color")
- self.assertEqual(pobj.color.constant, "GREEN")
+ self.assertEqual(pobj.color.constant, u"GREEN")
- for color, intended in zip(pobj.colors, ("GREEN", "BLUE", "RED")):
+ for color, intended in zip(pobj.colors, (u"GREEN", u"BLUE", u"RED")):
self.assertEqual(color.classdesc.name, "Color")
self.assertEqual(color.constant, intended)
# self._try_marshalling(jobj, pobj)
+ def test_sets(self):
+ """
+ Tests handling of HashSet and TreeSet
+ """
+ for filename in (
+ "testHashSet.ser",
+ "testTreeSet.ser",
+ "testLinkedHashSet.ser",
+ ):
+ _logger.debug("Loading file: %s", filename)
+ jobj = self.read_file(filename)
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ self.assertIsInstance(pobj, set)
+ self.assertSetEqual({i.value for i in pobj}, {1, 2, 42})
+
+ def test_times(self):
+ """
+ Tests the handling of java.time classes
+ """
+ jobj = self.read_file("testTime.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+
+ # First one is a duration of 10s
+ duration = pobj[0]
+ self.assertEqual(duration.second, 10)
+
+ # Check types
+ self.assertIsInstance(pobj, javaobj.beans.JavaArray)
+ for obj in pobj:
+ self.assertIsInstance(
+ obj, javaobj.DefaultObjectTransformer.JavaTime
+ )
+
# def test_exception(self):
# jobj = self.read_file("objException.ser")
# pobj = javaobj.loads(jobj)
@@ -321,19 +442,23 @@ def test_enums(self):
# self.assertEqual(classdesc.name, "MyExceptionWhenDumping")
def test_sun_example(self):
- marshaller = javaobj.JavaObjectUnmarshaller(
- self.read_file("sunExample.ser", stream=True))
- pobj = marshaller.readObject()
+ marshaller = javaobj.JavaObjectUnmarshaller(
+ self.read_file("sunExample.ser", stream=True)
+ )
+ pobj = marshaller.readObject()
- self.assertEqual(pobj.value, 17)
- self.assertTrue(pobj.next)
+ self.assertEqual(pobj.value, 17)
+ self.assertTrue(pobj.next)
- pobj = marshaller.readObject()
+ pobj = marshaller.readObject()
- self.assertEqual(pobj.value, 19)
- self.assertFalse(pobj.next)
+ self.assertEqual(pobj.value, 19)
+ self.assertFalse(pobj.next)
def test_collections(self):
+ """
+ Tests the handling of ArrayList, LinkedList and HashMap
+ """
jobj = self.read_file("objCollections.ser")
pobj = javaobj.loads(jobj)
_logger.debug(pobj)
@@ -349,14 +474,45 @@ def test_collections(self):
# self._try_marshalling(jobj, pobj)
def test_jceks_issue_5(self):
+ """
+ Tests the handling of JCEKS issue #5
+ """
jobj = self.read_file("jceks_issue_5.ser")
pobj = javaobj.loads(jobj)
_logger.info(pobj)
# self._try_marshalling(jobj, pobj)
+ def test_qistoph_pr_27(self):
+ """
+ Tests support for Bool, Integer, Long classes (PR #27)
+ """
+ # Load the basic map
+ jobj = self.read_file("testBoolIntLong.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+
+ # Basic checking
+ self.assertEqual(pobj[u"key1"], u"value1")
+ self.assertEqual(pobj[u"key2"], u"value2")
+ self.assertEqual(pobj[u"int"], 9)
+ self.assertEqual(pobj[u"int2"], 10)
+ self.assertEqual(pobj[u"bool"], True)
+ self.assertEqual(pobj[u"bool2"], True)
+
+ # Load the parent map
+ jobj2 = self.read_file("testBoolIntLong-2.ser")
+ pobj2 = javaobj.loads(jobj2)
+ _logger.debug(pobj2)
+
+ parent_map = pobj2[u"subMap"]
+ for key, value in pobj.items():
+ self.assertEqual(parent_map[key], value)
+
+
# ------------------------------------------------------------------------------
-if __name__ == '__main__':
+
+if __name__ == "__main__":
# Setup logging
logging.basicConfig(level=logging.INFO)
diff --git a/tests/test_v2.py b/tests/test_v2.py
new file mode 100644
index 0000000..301db9c
--- /dev/null
+++ b/tests/test_v2.py
@@ -0,0 +1,656 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Tests for javaobj
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+# Print is used in tests
+from __future__ import print_function
+
+# Standard library
+import logging
+import os
+import struct
+import subprocess
+import sys
+import unittest
+from io import BytesIO
+
+# Prepare Python path to import javaobj
+sys.path.insert(0, os.path.abspath(os.path.dirname(os.getcwd())))
+
+import javaobj.v2 as javaobj
+
+# Local
+from javaobj.utils import bytes_char, java_data_fd
+
+# ------------------------------------------------------------------------------
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+_logger = logging.getLogger("javaobj.tests")
+
+# ------------------------------------------------------------------------------
+
+# Custom writeObject parsing classes
+class CustomWriterInstance(javaobj.beans.JavaInstance):
+ def __init__(self):
+ javaobj.beans.JavaInstance.__init__(self)
+
+ def load_from_instance(self):
+ """
+ Updates the content of this instance
+ from its parsed fields and annotations
+ :return: True on success, False on error
+ """
+ if self.classdesc and self.classdesc in self.annotations:
+ fields = ["int_not_in_fields"] + self.classdesc.fields_names
+ raw_data = self.annotations[self.classdesc]
+ int_not_in_fields = struct.unpack(
+ ">i", BytesIO(raw_data[0].data).read(4)
+ )[0]
+ custom_obj = raw_data[1]
+ values = [int_not_in_fields, custom_obj]
+ self.field_data = dict(zip(fields, values))
+ return True
+
+ return False
+
+
+class RandomChildInstance(javaobj.beans.JavaInstance):
+ def load_from_instance(self):
+ """
+ Updates the content of this instance
+ from its parsed fields and annotations
+ :return: True on success, False on error
+ """
+ if self.classdesc and self.classdesc in self.field_data:
+ fields = self.classdesc.fields_names
+ values = [
+ self.field_data[self.classdesc][self.classdesc.fields[i]]
+ for i in range(len(fields))
+ ]
+ self.field_data = dict(zip(fields, values))
+ if (
+ self.classdesc.super_class
+ and self.classdesc.super_class in self.annotations
+ ):
+ super_class = self.annotations[self.classdesc.super_class][0]
+ self.annotations = dict(
+ zip(super_class.fields_names, super_class.field_data)
+ )
+ return True
+
+ return False
+
+
+class BaseTransformer(javaobj.transformers.ObjectTransformer):
+ """
+ Creates a JavaInstance object with custom loading methods for the
+ classes it can handle
+ """
+
+ def __init__(self, handled_classes=None):
+ self.instance = None
+ self.handled_classes = handled_classes or {}
+
+ def create_instance(self, classdesc):
+ """
+ Transforms a parsed Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ if classdesc.name in self.handled_classes:
+ self.instance = self.handled_classes[classdesc.name]()
+ return self.instance
+
+ return None
+
+
+class RandomChildTransformer(BaseTransformer):
+ def __init__(self):
+ super(RandomChildTransformer, self).__init__(
+ {"RandomChild": RandomChildInstance}
+ )
+
+
+class CustomWriterTransformer(BaseTransformer):
+ def __init__(self):
+ super(CustomWriterTransformer, self).__init__(
+ {"CustomWriter": CustomWriterInstance}
+ )
+
+
+class JavaRandomTransformer(BaseTransformer):
+ def __init__(self):
+ super(JavaRandomTransformer, self).__init__()
+ self.name = "java.util.Random"
+ self.field_names = ["haveNextNextGaussian", "nextNextGaussian", "seed"]
+ self.field_types = [
+ javaobj.beans.FieldType.BOOLEAN,
+ javaobj.beans.FieldType.DOUBLE,
+ javaobj.beans.FieldType.LONG,
+ ]
+
+ def load_custom_writeObject(self, parser, reader, name):
+ if name != self.name:
+ return None
+
+ fields = []
+ values = []
+ for f_name, f_type in zip(self.field_names, self.field_types):
+ values.append(parser._read_field_value(f_type))
+ fields.append(javaobj.beans.JavaField(f_type, f_name))
+
+ class_desc = javaobj.beans.JavaClassDesc(
+ javaobj.beans.ClassDescType.NORMALCLASS
+ )
+ class_desc.name = self.name
+ class_desc.desc_flags = javaobj.beans.ClassDataType.EXTERNAL_CONTENTS
+ class_desc.fields = fields
+ class_desc.field_data = values
+ return class_desc
+
+
+# ------------------------------------------------------------------------------
+
+
+class TestJavaobjV2(unittest.TestCase):
+ """
+ Full test suite for javaobj V2 Parser
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """
+ Calls Maven to compile & run Java classes that will generate serialized
+ data
+ """
+ # Compute the java directory
+ java_dir = os.path.join(os.path.dirname(__file__), "java")
+
+ if not os.getenv("JAVAOBJ_NO_MAVEN"):
+ # Run Maven and go back to the working folder
+ cwd = os.getcwd()
+ os.chdir(java_dir)
+ subprocess.call("mvn test", shell=True)
+ os.chdir(cwd)
+
+ def read_file(self, filename, stream=False):
+ """
+ Reads the content of the given file in binary mode
+
+ :param filename: Name of the file to read
+ :param stream: If True, return the file stream
+ :return: File content or stream
+ """
+ for subfolder in ("java", ""):
+ found_file = os.path.join(
+ os.path.dirname(__file__), subfolder, filename
+ )
+ if os.path.exists(found_file):
+ break
+ else:
+ raise IOError("File not found: {0}".format(filename))
+
+ if stream:
+ return open(found_file, "rb")
+ else:
+ with open(found_file, "rb") as filep:
+ return filep.read()
+
+ def test_char_rw(self):
+ """
+ Reads testChar.ser and checks the serialization process
+ """
+ jobj = self.read_file("testChar.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read char object: %s", pobj)
+ self.assertEqual(pobj, b"\x00C")
+
+ def test_chars_rw(self):
+ """
+ Reads testChars.ser and checks the serialization process
+ """
+ # Expected string as a UTF-16 string
+ expected = "python-javaobj".encode("utf-16-be")
+
+ jobj = self.read_file("testChars.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read char objects: %s", pobj)
+ self.assertEqual(pobj, expected)
+ self.assertEqual(pobj, expected.decode("latin1"))
+
+ def test_gzip_open(self):
+ """
+ Tests if the GZip auto-uncompress works
+ """
+ with java_data_fd(self.read_file("testChars.ser", stream=True)) as fd:
+ base = fd.read()
+
+ with java_data_fd(
+ self.read_file("testChars.ser.gz", stream=True)
+ ) as fd:
+ gzipped = fd.read()
+
+ self.assertEqual(
+ base, gzipped, "Uncompressed content doesn't match the original"
+ )
+
+ def test_chars_gzip(self):
+ """
+ Reads testChars.ser.gz
+ """
+ # Expected string as a UTF-16 string
+ expected = "python-javaobj".encode("utf-16-be")
+
+ jobj = self.read_file("testChars.ser.gz")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read char objects: %s", pobj)
+ self.assertEqual(pobj, expected)
+ self.assertEqual(pobj, expected.decode("latin1"))
+
+ def test_double_rw(self):
+ """
+ Reads testDouble.ser and checks the serialization process
+ """
+ jobj = self.read_file("testDouble.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read double object: %s", pobj)
+
+ self.assertEqual(pobj, b"\x7f\xef\xff\xff\xff\xff\xff\xff")
+
+ def test_bytes_rw(self):
+ """
+ Reads testBytes.ser and checks the serialization process
+ """
+ jobj = self.read_file("testBytes.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read bytes: %s", pobj)
+
+ self.assertEqual(pobj, b"HelloWorld")
+
+ def test_class_with_byte_array_rw(self):
+ """
+ Tests handling of classes containing a Byte Array
+ """
+ jobj = self.read_file("testClassWithByteArray.ser")
+ pobj = javaobj.loads(jobj)
+
+ # j8spencer (Google, LLC) 2018-01-16: It seems specific support for
+ # byte arrays was added, but is a little out-of-step with the other
+ # types in terms of style. This UT was broken, since the "myArray"
+ # member has the array stored as a tuple of ints (not a byte string)
+ # in member called '_data.' I've updated to pass the UTs.
+ self.assertEqual(pobj.myArray._data, (1, 3, 7, 11))
+
+ def test_boolean(self):
+ """
+ Reads testBoolean.ser and checks the serialization process
+ """
+ jobj = self.read_file("testBoolean.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read boolean object: %s", pobj)
+
+ self.assertEqual(pobj, bytes_char(0))
+
+ def test_byte(self):
+ """
+ Reads testByte.ser
+
+ The result from javaobj is a single-character string.
+ """
+ jobj = self.read_file("testByte.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read Byte: %r", pobj)
+
+ self.assertEqual(pobj, bytes_char(127))
+
+ def test_fields(self):
+ """
+ Reads a serialized object and checks its fields
+ """
+ jobj = self.read_file("test_readFields.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read object: %s", pobj)
+
+ self.assertEqual(pobj.aField1, u"Gabba")
+ self.assertEqual(pobj.aField2, None)
+
+ classdesc = pobj.get_class()
+ self.assertTrue(classdesc)
+ self.assertEqual(classdesc.serialVersionUID, 0x7F0941F5)
+ self.assertEqual(classdesc.name, "OneTest$SerializableTestHelper")
+
+ _logger.debug("Class..........: %s", classdesc)
+ _logger.debug(".. Flags.......: %s", classdesc.flags)
+ _logger.debug(".. Fields Names: %s", classdesc.fields_names)
+ _logger.debug(".. Fields Types: %s", classdesc.fields_types)
+
+ self.assertEqual(len(classdesc.fields_names), 3)
+
+ def test_class(self):
+ """
+ Reads the serialized String class
+ """
+ jobj = self.read_file("testClass.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug("Read object: %s", pobj)
+ self.assertEqual(pobj.name, "java.lang.String")
+
+ # def test_swing_object(self):
+ # """
+ # Reads a serialized Swing component
+ # """
+ # jobj = self.read_file("testSwingObject.ser")
+ # pobj = javaobj.loads(jobj)
+ # _logger.debug("Read object: %s", pobj)
+ #
+ # classdesc = pobj.get_class()
+ # _logger.debug("Class..........: %s", classdesc)
+ # _logger.debug(".. Fields Names: %s", classdesc.fields_names)
+ # _logger.debug(".. Fields Types: %s", classdesc.fields_types)
+
+ def test_super(self):
+ """
+ Tests basic class inheritance handling
+ """
+ jobj = self.read_file("objSuper.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+
+ classdesc = pobj.get_class()
+ _logger.debug(classdesc)
+ _logger.debug(classdesc.fields_names)
+ _logger.debug(classdesc.fields_types)
+
+ self.assertEqual(pobj.childString, u"Child!!")
+ self.assertEqual(pobj.bool, True)
+ self.assertEqual(pobj.integer, -1)
+ self.assertEqual(pobj.superString, u"Super!!")
+
+ def test_arrays(self):
+ """
+ Tests handling of Java arrays
+ """
+ jobj = self.read_file("objArrays.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+
+ classdesc = pobj.get_class()
+ _logger.debug(classdesc)
+ _logger.debug(classdesc.fields_names)
+ _logger.debug(classdesc.fields_types)
+
+ # public String[] stringArr = {"1", "2", "3"};
+ # public int[] integerArr = {1,2,3};
+ # public boolean[] boolArr = {true, false, true};
+ # public TestConcrete[] concreteArr = {new TestConcrete(),
+ # new TestConcrete()};
+
+ _logger.debug(pobj.stringArr)
+ _logger.debug(pobj.integerArr)
+ _logger.debug(pobj.boolArr)
+ _logger.debug(pobj.concreteArr)
+
+ def test_japan(self):
+ """
+ Tests the UTF encoding handling with Japanese characters
+ """
+ # Japan.ser contains a string using wide characters: the name of the
+ # state from Japan (according to wikipedia)
+ jobj = self.read_file("testJapan.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ # Compare the UTF-8 encoded version of the name
+ self.assertEqual(
+ pobj, b"\xe6\x97\xa5\xe6\x9c\xac\xe5\x9b\xbd".decode("utf-8")
+ )
+
+ def test_char_array(self):
+ """
+ Tests the loading of a wide-char array
+ """
+ jobj = self.read_file("testCharArray.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ self.assertEqual(
+ pobj,
+ [
+ u"\u0000",
+ u"\ud800",
+ u"\u0001",
+ u"\udc00",
+ u"\u0002",
+ u"\uffff",
+ u"\u0003",
+ ],
+ )
+
+ def test_2d_array(self):
+ """
+ Tests the handling of a 2D array
+ """
+ jobj = self.read_file("test2DArray.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ self.assertEqual(
+ pobj, [[1, 2, 3], [4, 5, 6],],
+ )
+
+ def test_class_array(self):
+ """
+ Tests the handling of an array of Class objects
+ """
+ jobj = self.read_file("testClassArray.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ self.assertEqual(pobj[0].name, "java.lang.Integer")
+ self.assertEqual(pobj[1].name, "java.io.ObjectOutputStream")
+ self.assertEqual(pobj[2].name, "java.lang.Exception")
+
+ def test_enums(self):
+ """
+ Tests the handling of "enum" types
+ """
+ jobj = self.read_file("objEnums.ser")
+ pobj = javaobj.loads(jobj)
+
+ classdesc = pobj.get_class()
+ _logger.debug("classdesc: {0}".format(classdesc))
+ _logger.debug("fields_names: {0}".format(classdesc.fields_names))
+ _logger.debug("fields_types: {0}".format(classdesc.fields_types))
+
+ self.assertEqual(classdesc.name, "ClassWithEnum")
+ self.assertEqual(pobj.color.classdesc.name, "Color")
+ self.assertEqual(pobj.color.constant, u"GREEN")
+
+ for color, intended in zip(pobj.colors, (u"GREEN", u"BLUE", u"RED")):
+ _logger.debug("color: {0} - {1}".format(color, type(color)))
+ self.assertEqual(color.classdesc.name, "Color")
+ self.assertEqual(color.constant, intended)
+
+ def test_sets(self):
+ """
+ Tests handling of HashSet and TreeSet
+ """
+ for filename in (
+ "testHashSet.ser",
+ "testTreeSet.ser",
+ "testLinkedHashSet.ser",
+ ):
+ _logger.debug("Loading file: %s", filename)
+ jobj = self.read_file(filename)
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+ self.assertIsInstance(pobj, set)
+ self.assertSetEqual({i.value for i in pobj}, {1, 2, 42})
+
+ def test_times(self):
+ """
+ Tests the handling of java.time classes
+ """
+ jobj = self.read_file("testTime.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+
+ # First one is a duration of 10s
+ duration = pobj[0]
+ self.assertEqual(duration.second, 10)
+
+ # Check types
+ self.assertIsInstance(pobj, javaobj.beans.JavaArray)
+ for obj in pobj:
+ self.assertIsInstance(obj, javaobj.transformers.JavaTime)
+
+ # def test_exception(self):
+ # jobj = self.read_file("objException.ser")
+ # pobj = javaobj.loads(jobj)
+ # _logger.debug(pobj)
+ #
+ # classdesc = pobj.get_class()
+ # _logger.debug(classdesc)
+ # _logger.debug(classdesc.fields_names)
+ # _logger.debug(classdesc.fields_types)
+ #
+ # # TODO: add some tests
+ # self.assertEqual(classdesc.name, "MyExceptionWhenDumping")
+
+ def test_sun_example(self):
+ content = javaobj.load(self.read_file("sunExample.ser", stream=True))
+
+ pobj = content[0]
+ self.assertEqual(pobj.value, 17)
+ self.assertTrue(pobj.next)
+
+ pobj = content[1]
+ self.assertEqual(pobj.value, 19)
+ self.assertFalse(pobj.next)
+
+ def test_collections(self):
+ """
+ Tests the handling of ArrayList, LinkedList and HashMap
+ """
+ jobj = self.read_file("objCollections.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+
+ _logger.debug("arrayList: %s", pobj.arrayList)
+ self.assertTrue(isinstance(pobj.arrayList, list))
+ _logger.debug("hashMap: %s", pobj.hashMap)
+ self.assertTrue(isinstance(pobj.hashMap, dict))
+ _logger.debug("linkedList: %s", pobj.linkedList)
+ self.assertTrue(isinstance(pobj.linkedList, list))
+
+ # FIXME: referencing problems with the collection class
+
+ def test_jceks_issue_5(self):
+ """
+ Tests the handling of JCEKS issue #5
+ """
+ jobj = self.read_file("jceks_issue_5.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.info(pobj)
+
+ def test_qistoph_pr_27(self):
+ """
+ Tests support for Bool, Integer, Long classes (PR #27)
+ """
+ # Load the basic map
+ jobj = self.read_file("testBoolIntLong.ser")
+ pobj = javaobj.loads(jobj)
+ _logger.debug(pobj)
+
+ # Basic checking
+ self.assertEqual(pobj[u"key1"], u"value1")
+ self.assertEqual(pobj[u"key2"], u"value2")
+ self.assertEqual(pobj[u"int"], 9)
+ self.assertEqual(pobj[u"int2"], 10)
+ self.assertEqual(pobj[u"bool"], True)
+ self.assertEqual(pobj[u"bool2"], True)
+
+ # Load the parent map
+ jobj2 = self.read_file("testBoolIntLong-2.ser")
+ pobj2 = javaobj.loads(jobj2)
+ _logger.debug(pobj2)
+
+ parent_map = pobj2[u"subMap"]
+ for key, value in pobj.items():
+ self.assertEqual(parent_map[key], value)
+
+ def test_writeObject(self):
+ """
+ Tests support for custom writeObject (PR #38)
+ """
+
+ ser = self.read_file("testCustomWriteObject.ser")
+ transformers = [
+ CustomWriterTransformer(),
+ RandomChildTransformer(),
+ JavaRandomTransformer(),
+ ]
+ pobj = javaobj.loads(ser, *transformers)
+
+ self.assertEqual(isinstance(pobj, CustomWriterInstance), True)
+ self.assertEqual(
+ isinstance(pobj.field_data["custom_obj"], RandomChildInstance),
+ True,
+ )
+
+ parent_data = pobj.field_data
+ child_data = parent_data["custom_obj"].field_data
+ super_data = parent_data["custom_obj"].annotations
+ expected = {
+ "int_not_in_fields": 0,
+ "custom_obj": {
+ "field_data": {"doub": 4.5, "num": 1},
+ "annotations": {
+ "haveNextNextGaussian": False,
+ "nextNextGaussian": 0.0,
+ "seed": 25214903879,
+ },
+ },
+ }
+
+ self.assertEqual(
+ expected["int_not_in_fields"], parent_data["int_not_in_fields"]
+ )
+ self.assertEqual(expected["custom_obj"]["field_data"], child_data)
+ self.assertEqual(expected["custom_obj"]["annotations"], super_data)
+
+
+# ------------------------------------------------------------------------------
+
+
+if __name__ == "__main__":
+ # Setup logging
+ logging.basicConfig(level=logging.INFO)
+
+ # Run tests
+ unittest.main()