Source code for wpull.protocol.http.request

# encoding=utf-8
'''HTTP conversation objects.'''
import copy
import re

from wpull.protocol.abstract.request import SerializableMixin, DictableMixin, \
    URLPropertyMixin, ProtocolResponseMixin, BaseResponse, BaseRequest
from wpull.errors import ProtocolError
from wpull.namevalue import NameValueRecord
import wpull.string

[docs]class RawRequest(BaseRequest, SerializableMixin, DictableMixin): '''Represents an HTTP request. Attributes: method (str): The HTTP method in the status line. For example, ``GET``, ``POST``. resource_path (str): The URL or "path" in the status line. version (str): The HTTP version in the status line. For example, ``HTTP/1.0``. fields (:class:`.namevalue.NameValueRecord`): The fields in the HTTP header. body (:class:`.body.Body`, file-like, None): An optional payload. encoding (str): The encoding of the status line. ''' def __init__(self, method=None, resource_path=None, version='HTTP/1.1'): super().__init__() self.method = method self.resource_path = resource_path self.version = version self.fields = NameValueRecord(encoding='latin-1') self.body = None self.encoding = 'latin-1'
[docs] def to_dict(self): return { 'protocol': 'http', 'method': self.method, 'version': self.version, 'resource_path': self.resource_path, 'fields': list(self.fields.get_all()), 'body': self.call_to_dict_or_none(self.body), 'encoding': self.encoding, }
[docs] def to_bytes(self): assert self.method assert self.resource_path assert self.version status = '{0} {1} {2}'.format(self.method, self.resource_path, self.version).encode(self.encoding) fields = self.fields.to_bytes(errors='replace') return b'\r\n'.join([status, fields, b''])
[docs] def parse(self, data): if not self.resource_path: line, data = data.split(b'\n', 1) self.method, self.resource_path, self.version = self.parse_status_line(line) self.fields.parse(data, strict=False)
[docs] def parse_status_line(self, data): '''Parse the status line bytes. Returns: tuple: An tuple representing the method, URI, and version. ''' match = re.match( br'([a-zA-Z]+)[ \t]+([^ \t]+)[ \t]+(HTTP/\d+\.\d+)', data ) if match: groups = match.groups() if len(groups) == 3: return wpull.string.to_str( (groups[0], groups[1], groups[2]), encoding=self.encoding, ) raise ProtocolError('Error parsing status line.')
def __repr__(self): return '<Request({method}, {url}, {version})>'.format( method=self.method, url=self.resource_path, version=self.version )
[docs] def copy(self): '''Return a copy.''' return copy.deepcopy(self)
[docs] def set_continue(self, offset): '''Modify the request into a range request.''' assert offset >= 0, offset self.fields['Range'] = 'bytes={0}-'.format(offset)
[docs]class Request(RawRequest): '''Represents a higher level of HTTP request. Attributes: address (tuple): An address tuple suitable for :func:`socket.connect`. username (str): Username for HTTP authentication. password (str): Password for HTTP authentication. ''' def __init__(self, url=None, method='GET', version='HTTP/1.1'): super().__init__(method=method, resource_path=url, version=version) self.address = None self.username = None self.password = None if url: self.url = url
[docs] def to_dict(self): dict_obj = super().to_dict() dict_obj['url'] = self._url dict_obj['url_info'] = self._url_info.to_dict() if self._url_info else None return dict_obj
[docs] def prepare_for_send(self, full_url=False): '''Modify the request to be suitable for HTTP server. Args: full_url (bool): Use full URL as the URI. By default, only the path of the URL is given to the server. ''' assert self.url assert self.method assert self.version url_info = self.url_info if 'Host' not in self.fields: self.fields['Host'] = url_info.hostname_with_port if not full_url: if url_info.query: self.resource_path = '{0}?{1}'.format(url_info.path, url_info.query) else: self.resource_path = url_info.path else: self.resource_path = url_info.url
[docs] def parse(self, data): super().parse(data) if not self._url: assert self.resource_path if self.resource_path[0:1] == '/' and 'Host' in self.fields: self.url = 'http://{0}{1}'.format(self.fields['Host'], self.resource_path) elif self.resource_path.startswith('http'): self.url = self.resource_path
[docs]class Response(BaseResponse, SerializableMixin, DictableMixin): '''Represents the HTTP response. Attributes: status_code (int): The status code in the status line. status_reason (str): The status reason string in the status line. version (str): The HTTP version in the status line. For example, ``HTTP/1.1``. fields (:class:`.namevalue.NameValueRecord`): The fields in the HTTP headers (and trailer, if present). body (:class:`.body.Body`, file-like, None): The optional payload (without and transfer or content encoding). request: The corresponding request. encoding (str): The encoding of the status line. ''' def __init__(self, status_code=None, reason=None, version='HTTP/1.1', request=None): super().__init__() if status_code is not None: assert isinstance(status_code, int), \ 'Expect int, got {}'.format(type(status_code)) assert reason is not None self.status_code = status_code self.reason = reason self.version = version self.fields = NameValueRecord(encoding='latin-1') self.request = request self.encoding = 'latin-1' @property def protocol(self): return 'http'
[docs] def to_dict(self): return { 'protocol': 'http', 'status_code': self.status_code, 'reason': self.reason, 'response_code': self.status_code, 'response_message': self.reason, 'version': self.version, 'fields': list(self.fields.get_all()), 'body': self.call_to_dict_or_none(self.body), 'request': self.request.to_dict() if self.request else None, 'encoding': self.encoding, }
[docs] def to_bytes(self): assert self.version assert self.status_code is not None assert self.reason is not None status = '{0} {1} {2}'.format(self.version, self.status_code, self.reason).encode(self.encoding) fields = self.fields.to_bytes(errors='replace') return b'\r\n'.join([status, fields, b''])
[docs] def parse(self, data): if self.status_code is None: line, data = data.split(b'\n', 1) self.version, self.status_code, self.reason = self.parse_status_line(line) self.fields.parse(data, strict=False)
[docs] def parse_status_line(cls, data): '''Parse the status line bytes. Returns: tuple: An tuple representing the version, code, and reason. ''' match = re.match( br'(HTTP/\d+\.\d+)[ \t]+([0-9]{1,3})[ \t]*([^\r\n]*)', data ) if match: groups = match.groups() if len(groups) == 3: return wpull.string.to_str( (groups[0], int(groups[1]), groups[2]), encoding='latin-1', ) raise ProtocolError( 'Error parsing status line {line}".'.format(line=ascii(data)) )
def __repr__(self): return '<Response({version}, {code}, {reason})>'.format( version=ascii(self.version), code=self.status_code, reason=ascii(self.reason) ) def __str__(self): return wpull.string.printable_str( self.to_bytes().decode('utf-8', 'replace'), keep_newlines=True )
[docs] def response_code(self): return self.status_code
[docs] def response_message(self): return self.reason