diff --git a/README.md b/README.md
new file mode 100644
index 0000000..86aaf53
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+# pythonparser
+
+Pythonparser is a python tool that produces a GumTree compatible Python AST. It uses under the hood asttokens and jsontree.
+This version has two scripts available. One for py2 and one for py3
+
+## Installation
+
+Just clone the repository and `pip install -r requirements.txt`
+
+## Usage
+On Windows:
+* Add PYTHONPATH envvar to pip install directory
+* Change the PythonTreeGenerator.java to use python command instead of PATH
+
diff --git a/pythonparser b/pythonparser.py
old mode 100755
new mode 100644
similarity index 88%
rename from pythonparser
rename to pythonparser.py
index ef7cdf7..da821a5
--- a/pythonparser
+++ b/pythonparser.py
@@ -21,10 +21,9 @@ def parse_file(filename):
json_tree = []
def localize(node, json_node):
- json_node['line_no'] = str(node.first_token.start[0])
- json_node['col'] = str(node.first_token.start[1])
- json_node['end_line_no'] = str(node.last_token.end[0])
- json_node['end_col'] = str(node.last_token.end[1])
+ json_pos_and_length = extract_pos_and_length(node, node)
+ json_node['pos'] = str(json_pos_and_length[0])
+ json_node['length'] = str(json_pos_and_length[1])
def gen_identifier(identifier, node_type = 'identifier', node=None):
pos = len(json_tree)
@@ -46,6 +45,7 @@ def traverse_list(l, node_type = 'list', node = None):
children.append(traverse(item))
if (len(children) != 0):
json_node['children'] = children
+
return pos
def traverse(node):
@@ -54,7 +54,6 @@ def traverse(node):
json_tree.append(json_node)
json_node['type'] = type(node).__name__
localize(node, json_node)
-
children = []
if isinstance(node, ast.Name):
json_node['value'] = node.id
@@ -140,26 +139,38 @@ def traverse(node):
if (len(children) != 0):
json_node['children'] = children
+
return pos
+ def extract_pos_and_length(node, other_node):
+ try:
+ return [node.startpos, other_node.endpos - node.startpos]
+ except:
+ try:
+ return [node.first_token.startpos, other_node.last_token.endpos - node.first_token.startpos]
+ except:
+ pass
+ return [-1, -1]
+
traverse(tree)
return json_tree
-
def json2xml(tree):
lines = []
def convert_node(i, indent_level=0):
node = tree[i]
- line = "\t" * indent_level + "<{}".format(node['type'])
- for key in ['value', 'lineno', 'col', 'end_line_no', 'end_col']:
+ line = "\t" * indent_level + ""
lines.append(line)
if "children" in node:
for child in node["children"]:
convert_node(int(child), indent_level + 1)
- lines.append("\t" * indent_level + "" + node["type"] + ">")
+ lines.append("\t" * indent_level + "")
return lines
return "\n".join(convert_node(0))
diff --git a/pythonparser3 b/pythonparser3.py
old mode 100755
new mode 100644
similarity index 89%
rename from pythonparser3
rename to pythonparser3.py
index 100d487..6ab6a9d
--- a/pythonparser3
+++ b/pythonparser3.py
@@ -20,10 +20,9 @@ def parse_file(filename):
json_tree = []
def localize(node, json_node):
- json_node['lineno'] = str(node.first_token.start[0])
- json_node['col'] = str(node.first_token.start[1])
- json_node['end_line_no'] = str(node.last_token.end[0])
- json_node['end_col'] = str(node.last_token.end[1])
+ json_pos_and_length = extract_pos_and_length(node, node)
+ json_node['pos'] = str(json_pos_and_length[0])
+ json_node['length'] = str(json_pos_and_length[1])
def gen_identifier(identifier, node_type = 'identifier', node=None):
pos = len(json_tree)
@@ -149,25 +148,36 @@ def traverse(node):
if (len(children) != 0):
json_node['children'] = children
return pos
+
+ def extract_pos_and_length(node, other_node):
+ try:
+ return [node.startpos, other_node.endpos - node.startpos]
+ except:
+ try:
+ return [node.first_token.startpos, other_node.last_token.endpos - node.first_token.startpos]
+ except:
+ pass
+ return [-1, -1]
traverse(tree)
return json_tree
-
def json2xml(tree):
lines = []
def convert_node(i, indent_level=0):
node = tree[i]
- line = "\t" * indent_level + "<{}".format(node['type'])
- for key in ['value', 'lineno', 'col', 'end_line_no', 'end_col']:
+ line = "\t" * indent_level + ""
lines.append(line)
if "children" in node:
for child in node["children"]:
convert_node(int(child), indent_level + 1)
- lines.append("\t" * indent_level + "" + node["type"] + ">")
+ lines.append("\t" * indent_level + "")
return lines
return "\n".join(convert_node(0))
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..de9c16b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+jsontree
+asttokens
\ No newline at end of file