mirror of
https://github.com/mitmproxy/mitmproxy.git
synced 2025-02-06 05:18:39 +00:00
c69239bb90
mitmproxy previously used a homegrown logging mechanism based around `mitmproxy.ctx.log` and the `add_log` hook. This worked well for everything we control, but does not work outside the mitmproxy universe. For now we have simply ignored logging in e.g. tornado or h2, but with the upcoming introduction of mitmproxy_wireguard we now have a dependency on some Rust/PyO3 code for which we definitely want logs, but which also cannot easily be changed to use our homegrown logging (PyO3 does the heavy lifting to add interoperability with stdlib logging). Long story short, we want to introduce a log handler for stdlib logging. Now there are two ways how such a handler could operate: 1. We could build a handler that forwards all stdlib log events into our homegrown mechanism. 2. We embrace stdlib's logging as the correct way to do things, and get rid of our homegrown stuff. This PR follows the second approach by removing the `add_log` hook and rewriting the `TermLog` and `EventStore` addons to listen for stdlib log records. This means that all `mitmproxy.ctx.log.info` events are now simply `logging.info` etc. One upside of this approach is that many parts of the codebase now don't depend on the existence of `mitmproxy.ctx` and we can use off-the-shelf things like pytest's `caplog`. We can also now better colorize log output and/or add timestamps.
123 lines
4.3 KiB
Python
123 lines
4.3 KiB
Python
"""
|
|
Save streamed requests and responses
|
|
|
|
If the option 'save_streamed_data' is set to a format string then
|
|
streamed requests and responses are written to individual files with a name
|
|
derived from the string. Apart from python strftime() formating (using the
|
|
request start time) the following codes can also be used:
|
|
- %+T: The time stamp of the request with microseconds
|
|
- %+D: 'req' or 'rsp' indicating the direction of the data
|
|
- %+I: The client connection ID
|
|
- %+C: The client IP address
|
|
A good starting point for a template could be '~/streamed_files/%+D:%+T:%+I',
|
|
a more complex example is '~/streamed_files/%+C/%Y-%m-%d%/%+D:%+T:%+I'.
|
|
The client connection ID combined with the request time stamp should be unique
|
|
for associating a file with its corresponding flow in the stream saved with
|
|
'--save-stream-file'.
|
|
|
|
This addon is not compatible with addons that use the same mechanism to
|
|
capture streamed data, http-stream-modify.py for instance.
|
|
"""
|
|
import logging
|
|
import os
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from mitmproxy import ctx
|
|
|
|
|
|
class StreamSaver:
|
|
TAG = "save_streamed_data: "
|
|
|
|
def __init__(self, flow, direction):
|
|
self.flow = flow
|
|
self.direction = direction
|
|
self.fh = None
|
|
self.path = None
|
|
|
|
def done(self):
|
|
if self.fh:
|
|
self.fh.close()
|
|
self.fh = None
|
|
# Make sure we have no circular references
|
|
self.flow = None
|
|
|
|
def __call__(self, data):
|
|
# End of stream?
|
|
if len(data) == 0:
|
|
self.done()
|
|
return data
|
|
|
|
# Just in case the option changes while a stream is in flight
|
|
if not ctx.options.save_streamed_data:
|
|
return data
|
|
|
|
# This is a safeguard but should not be needed
|
|
if not self.flow or not self.flow.request:
|
|
return data
|
|
|
|
if not self.fh:
|
|
self.path = datetime.fromtimestamp(self.flow.request.timestamp_start).strftime(
|
|
ctx.options.save_streamed_data)
|
|
self.path = self.path.replace('%+T', str(self.flow.request.timestamp_start))
|
|
self.path = self.path.replace('%+I', str(self.flow.client_conn.id))
|
|
self.path = self.path.replace('%+D', self.direction)
|
|
self.path = self.path.replace('%+C', self.flow.client_conn.address[0])
|
|
self.path = os.path.expanduser(self.path)
|
|
|
|
parent = Path(self.path).parent
|
|
try:
|
|
if not parent.exists():
|
|
parent.mkdir(parents=True, exist_ok=True)
|
|
except OSError:
|
|
logging.error(f"{self.TAG}Failed to create directory: {parent}")
|
|
|
|
try:
|
|
self.fh = open(self.path, "wb", buffering=0)
|
|
except OSError:
|
|
logging.error(f"{self.TAG}Failed to open for writing: {self.path}")
|
|
|
|
if self.fh:
|
|
try:
|
|
self.fh.write(data)
|
|
except OSError:
|
|
logging.error(f"{self.TAG}Failed to write to: {self.path}")
|
|
|
|
return data
|
|
|
|
|
|
def load(loader):
|
|
loader.add_option(
|
|
"save_streamed_data", Optional[str], None,
|
|
"Format string for saving streamed data to files. If set each streamed request or response is written "
|
|
"to a file with a name derived from the string. In addition to formating supported by python "
|
|
"strftime() (using the request start time) the code '%+T' is replaced with the time stamp of the request, "
|
|
"'%+D' by 'req' or 'rsp' depending on the direction of the data, '%+C' by the client IP addresses and "
|
|
"'%+I' by the client connection ID."
|
|
)
|
|
|
|
|
|
def requestheaders(flow):
|
|
if ctx.options.save_streamed_data and flow.request.stream:
|
|
flow.request.stream = StreamSaver(flow, 'req')
|
|
|
|
|
|
def responseheaders(flow):
|
|
if isinstance(flow.request.stream, StreamSaver):
|
|
flow.request.stream.done()
|
|
if ctx.options.save_streamed_data and flow.response.stream:
|
|
flow.response.stream = StreamSaver(flow, 'rsp')
|
|
|
|
|
|
def response(flow):
|
|
if isinstance(flow.response.stream, StreamSaver):
|
|
flow.response.stream.done()
|
|
|
|
|
|
def error(flow):
|
|
if flow.request and isinstance(flow.request.stream, StreamSaver):
|
|
flow.request.stream.done()
|
|
if flow.response and isinstance(flow.response.stream, StreamSaver):
|
|
flow.response.stream.done()
|