wip: har and mitmproxy capture readers

wip/har-support
alufers 3 years ago
parent b5a0fb5b88
commit 6cf8578d06

@ -0,0 +1,38 @@
import os
from tokenize import Number
import json_stream
# a heuristic to determine if a fileis a har archive
def har_archive_heuristic(file_path: str) -> Number:
val = 0
# if has the har extension
if file_path.endswith('.har'):
val += 15
# read the first 2048 bytes
with open(file_path, 'rb') as f:
data = f.read(2048)
# if file contains only ascii characters
if data.decode('utf-8', 'ignore').isprintable() is True:
val += 40
# if first character is a '{'
if data[0] == '{':
val += 15
# if it contains the word '"WebInspector"'
if b'"WebInspector"' in data:
val += 15
# if it contains the word '"entries"'
if b'"entries"' in data:
val += 15
# if it contains the word '"version"'
if b'"version"' in data:
val += 15
return val
class HarCaptureReader:
def __init__(self, file_path: str):
self.file_path = file_path
def captured_requests(self) -> Iterator[HarFlowWrapper]:
with open(self.file_path, 'r') as f:
data = json_stream.load(f)
for entry in data['log']['entries']:
yield HarFlowWrapper(entry.persistent())

@ -0,0 +1,78 @@
from tokenize import Number
from typing import Iterator
from mitmproxy import io as iom, http
from mitmproxy.exceptions import FlowReadException
import os
def mitmproxy_dump_file_huristic(file_path: str) -> Number:
val = 0
if 'flow' in file_path:
val += 1
if 'mitmproxy' in file_path:
val += 1
# read the first 2048 bytes
with open(file_path, 'rb') as f:
data = f.read(2048)
# if file contains non-ascii characters
if data.decode('utf-8', 'ignore').isprintable() is False:
val += 50
# if first character is a digit
if data[0].isdigit():
val += 5
# if it contains the word status_code
if b'status_code' in data:
val += 5
if b'regular' in data:
val += 10
return val
class MitmproxyFlowWrapper:
def __init__(self, flow: http.HTTPFlow):
self.flow = flow
def get_url(self):
return self.flow.request.url
def get_method(self):
return self.flow.request.method
def get_request_headers(self):
headers = {}
for k, v in self.flow.request.headers.items(multi = True):\
# create list on key if it does not exist
headers[k] = headers.get(k, [])
headers[k].append(v)
return headers
def get_request_body(self):
return self.flow.request.content
def get_response_status_code(self):
return self.flow.response.status_code
def get_response_headers(self):
headers = {}
for k, v in self.flow.response.headers.items(multi = True):\
# create list on key if it does not exist
headers[k] = headers.get(k, [])
headers[k].append(v)
return headers
def get_response_body(self):
return self.flow.response.content
class MitmproxyCaptureReader:
def __init__(self, file_path, progress_callback=None):
self.file_path = file_path
self.progress_callback = progress_callback
def captured_requests(self) -> Iterator[MitmproxyFlowWrapper]:
with open(self.file_path, 'rb') as logfile:
logfile_size = os.path.getsize(self.file_path)
freader = iom.FlowReader(logfile)
try:
for f in freader.stream():
if self.progress_callback:
self.progress_callback(logfile.tell() / logfile_size)
if isinstance(f, http.HTTPFlow):
yield MitmproxyFlowWrapper(f)
except FlowReadException as e:
print(f"Flow file corrupted: {e}")

88
poetry.lock generated

@ -6,6 +6,9 @@ category = "main"
optional = false
python-versions = ">=3.7"
[package.dependencies]
typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
[package.extras]
tests = ["pytest", "pytest-asyncio", "mypy (>=0.800)"]
@ -54,6 +57,7 @@ python-versions = ">=3.7"
[package.dependencies]
colorama = {version = "*", markers = "platform_system == \"Windows\""}
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
[[package]]
name = "colorama"
@ -82,6 +86,14 @@ sdist = ["setuptools_rust (>=0.11.4)"]
ssh = ["bcrypt (>=3.1.5)"]
test = ["pytest (>=6.2.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
[[package]]
name = "dataclasses"
version = "0.8"
description = "A backport of the dataclasses module for Python 3.6"
category = "main"
optional = false
python-versions = ">=3.6, <3.7"
[[package]]
name = "flask"
version = "2.0.3"
@ -108,6 +120,10 @@ category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
dataclasses = {version = "*", markers = "python_version < \"3.7\""}
typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
[[package]]
name = "h2"
version = "4.1.0"
@ -136,6 +152,23 @@ category = "main"
optional = false
python-versions = ">=3.6.1"
[[package]]
name = "importlib-metadata"
version = "4.11.3"
description = "Read metadata from Python packages"
category = "main"
optional = false
python-versions = ">=3.7"
[package.dependencies]
typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
zipp = ">=0.5"
[package.extras]
docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
perf = ["ipython"]
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
[[package]]
name = "itsdangerous"
version = "2.1.2"
@ -158,6 +191,17 @@ MarkupSafe = ">=2.0"
[package.extras]
i18n = ["Babel (>=2.7)"]
[[package]]
name = "json-stream"
version = "1.3.0"
description = "Streaming JSON decoder"
category = "main"
optional = false
python-versions = "<4,>=3.5"
[package.extras]
requests = ["requests"]
[[package]]
name = "kaitaistruct"
version = "0.9"
@ -362,6 +406,14 @@ category = "main"
optional = false
python-versions = ">= 3.5"
[[package]]
name = "typing-extensions"
version = "4.1.1"
description = "Backported and Experimental Type Hints for Python 3.6+"
category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "urwid"
version = "2.1.2"
@ -392,6 +444,18 @@ python-versions = ">=3.7.0"
[package.dependencies]
h11 = ">=0.9.0,<1"
[[package]]
name = "zipp"
version = "3.8.0"
description = "Backport of pathlib-compatible object wrapper for zip files"
category = "main"
optional = false
python-versions = ">=3.7"
[package.extras]
docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
[[package]]
name = "zstandard"
version = "0.17.0"
@ -408,8 +472,8 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.10"
content-hash = "6f13a784668fbbf96c0c732392cf912f243163fa01a7dd6ea4a7938d1ff84d32"
python-versions = "^3.5"
content-hash = "8d0c85f2b28deebc09b485b5b606fbb73a1d3129453125a9d483798dc9a924a8"
[metadata.files]
asgiref = [
@ -569,6 +633,10 @@ cryptography = [
{file = "cryptography-36.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e167b6b710c7f7bc54e67ef593f8731e1f45aa35f8a8a7b72d6e42ec76afd4b3"},
{file = "cryptography-36.0.2.tar.gz", hash = "sha256:70f8f4f7bb2ac9f340655cbac89d68c527af5bb4387522a8413e841e3e6628c9"},
]
dataclasses = [
{file = "dataclasses-0.8-py3-none-any.whl", hash = "sha256:0201d89fa866f68c8ebd9d08ee6ff50c0b255f8ec63a71c16fda7af82bb887bf"},
{file = "dataclasses-0.8.tar.gz", hash = "sha256:8479067f342acf957dc82ec415d355ab5edb7e7646b90dc6e2fd1d96ad084c97"},
]
flask = [
{file = "Flask-2.0.3-py3-none-any.whl", hash = "sha256:59da8a3170004800a2837844bfa84d49b022550616070f7cb1a659682b2e7c9f"},
{file = "Flask-2.0.3.tar.gz", hash = "sha256:e1120c228ca2f553b470df4a5fa927ab66258467526069981b3eb0a91902687d"},
@ -589,6 +657,10 @@ hyperframe = [
{file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
{file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"},
]
importlib-metadata = [
{file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"},
{file = "importlib_metadata-4.11.3.tar.gz", hash = "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"},
]
itsdangerous = [
{file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"},
{file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"},
@ -597,6 +669,10 @@ jinja2 = [
{file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
{file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
]
json-stream = [
{file = "json-stream-1.3.0.tar.gz", hash = "sha256:2790c16bccde6a77640c85e911a44de9c9de648206d9a4474b9ce398cfc7d14c"},
{file = "json_stream-1.3.0-py3-none-any.whl", hash = "sha256:bbb8bc29eed00d53d245224c893831c995bedc18b15fea08ed00c0cb485913ef"},
]
kaitaistruct = [
{file = "kaitaistruct-0.9.tar.gz", hash = "sha256:3d5845817ec8a4d5504379cc11bd570b038850ee49c4580bc0998c8fb1d327ad"},
]
@ -836,6 +912,10 @@ tornado = [
{file = "tornado-6.1-cp39-cp39-win_amd64.whl", hash = "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4"},
{file = "tornado-6.1.tar.gz", hash = "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791"},
]
typing-extensions = [
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
]
urwid = [
{file = "urwid-2.1.2.tar.gz", hash = "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae"},
]
@ -847,6 +927,10 @@ wsproto = [
{file = "wsproto-1.1.0-py3-none-any.whl", hash = "sha256:2218cb57952d90b9fca325c0dcfb08c3bda93e8fd8070b0a17f048e2e47a521b"},
{file = "wsproto-1.1.0.tar.gz", hash = "sha256:a2e56bfd5c7cd83c1369d83b5feccd6d37798b74872866e62616e0ecf111bda8"},
]
zipp = [
{file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
{file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
]
zstandard = [
{file = "zstandard-0.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a1991cdf2e81e643b53fb8d272931d2bdf5f4e70d56a457e1ef95bde147ae627"},
{file = "zstandard-0.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4768449d8d1b0785309ace288e017cc5fa42e11a52bf08c90d9c3eb3a7a73cc6"},

@ -9,6 +9,7 @@ readme = "README.md"
python = "^3.5"
mitmproxy = "^8.0.0"
"ruamel.yaml" = "^0.17.21"
json-stream = "^1.3.0"
[tool.poetry.dev-dependencies]

Loading…
Cancel
Save