diff --git a/passbook/app_gw/middleware.py b/passbook/app_gw/middleware.py index 53ab5d617..919e5c6b5 100644 --- a/passbook/app_gw/middleware.py +++ b/passbook/app_gw/middleware.py @@ -1,241 +1,33 @@ """passbook app_gw middleware""" -import mimetypes -from logging import getLogger -from random import SystemRandom -from urllib.parse import urlparse - -import certifi -import urllib3 -from django.core.cache import cache -from django.utils.http import urlencode from django.views.generic import RedirectView -from revproxy.exceptions import InvalidUpstream -from passbook.app_gw.models import ApplicationGatewayProvider -from passbook.app_gw.proxy.response import get_django_response -from passbook.app_gw.proxy.utils import encode_items, normalize_request_headers -from passbook.app_gw.rewrite import Rewriter -from passbook.core.models import Application -from passbook.core.policies import PolicyEngine +from passbook.app_gw.proxy.handler import RequestHandler from passbook.lib.config import CONFIG -SESSION_UPSTREAM_KEY = 'passbook_app_gw_upstream' -IGNORED_HOSTNAMES_KEY = 'passbook_app_gw_ignored' -LOGGER = getLogger(__name__) -QUOTE_SAFE = r'<.;>\(}*+|~=-$/_:^@)[{]&\'!,"`' -ERRORS_MESSAGES = { - 'upstream-no-scheme': ("Upstream URL scheme must be either " - "'http' or 'https' (%s).") -} -HTTP_NO_VERIFY = urllib3.PoolManager() -HTTP = urllib3.PoolManager( - cert_reqs='CERT_REQUIRED', - ca_certs=certifi.where()) - -# pylint: disable=too-many-instance-attributes class ApplicationGatewayMiddleware: """Check if request should be proxied or handeled normally""" - ignored_hosts = [] - request = None - app_gw = None - http = None - http_no_verify = None - host_header = '' - - _parsed_url = None - _request_headers = None + _app_gw_cache = {} def __init__(self, get_response): self.get_response = get_response - self.ignored_hosts = cache.get(IGNORED_HOSTNAMES_KEY, []) - - def precheck(self, request): - """Check if a request should be proxied or forwarded to passbook""" - # Check if hostname is in cached list of ignored hostnames - # This saves us having to query the database on each request - self.host_header = request.META.get('HTTP_HOST') - if self.host_header in self.ignored_hosts: - LOGGER.debug("%s is ignored", self.host_header) - return True, None - # Look through all ApplicationGatewayProviders and check hostnames - matches = ApplicationGatewayProvider.objects.filter( - server_name__contains=[self.host_header], - enabled=True) - if not matches.exists(): - # Mo matching Providers found, add host header to ignored list - self.ignored_hosts.append(self.host_header) - cache.set(IGNORED_HOSTNAMES_KEY, self.ignored_hosts) - LOGGER.debug("Ignoring %s", self.host_header) - return True, None - # At this point we're certain there's a matching ApplicationGateway - if len(matches) > 1: - # This should never happen - raise ValueError - app_gw = matches.first() - try: - # Check if ApplicationGateway is associated with application - getattr(app_gw, 'application') - if app_gw: - return False, app_gw - except Application.DoesNotExist: - LOGGER.debug("ApplicationGateway not associated with Application") - return True, None def __call__(self, request): - forward, self.app_gw = self.precheck(request) - if forward: - return self.get_response(request) - self.request = request - return self.dispatch(request) + # Rudimentary cache + host_header = request.META.get('HTTP_HOST') + if host_header not in self._app_gw_cache: + self._app_gw_cache[host_header] = RequestHandler.find_app_gw_for_request(request) + if self._app_gw_cache[host_header]: + return self.dispatch(request, self._app_gw_cache[host_header]) + return self.get_response(request) - def _get_upstream(self): - """Choose random upstream and save in session""" - if SESSION_UPSTREAM_KEY not in self.request.session: - self.request.session[SESSION_UPSTREAM_KEY] = {} - if self.app_gw.pk not in self.request.session[SESSION_UPSTREAM_KEY]: - upstream_index = SystemRandom().randrange(len(self.app_gw.upstream)) - self.request.session[SESSION_UPSTREAM_KEY][self.app_gw.pk] = upstream_index - return self.app_gw.upstream[self.request.session[SESSION_UPSTREAM_KEY][self.app_gw.pk]] - - def get_upstream(self): - """Get upstream as parsed url""" - upstream = self._get_upstream() - - self._parsed_url = urlparse(upstream) - - if self._parsed_url.scheme not in ('http', 'https'): - raise InvalidUpstream(ERRORS_MESSAGES['upstream-no-scheme'] % - upstream) - - return upstream - - def _format_path_to_redirect(self, request): - LOGGER.debug("Path before: %s", request.get_full_path()) - rewriter = Rewriter(self.app_gw, request) - after = rewriter.build() - LOGGER.debug("Path after: %s", after) - return after - - def get_proxy_request_headers(self, request): - """Get normalized headers for the upstream - Gets all headers from the original request and normalizes them. - Normalization occurs by removing the prefix ``HTTP_`` and - replacing and ``_`` by ``-``. Example: ``HTTP_ACCEPT_ENCODING`` - becames ``Accept-Encoding``. - .. versionadded:: 0.9.1 - :param request: The original HTTPRequest instance - :returns: Normalized headers for the upstream - """ - return normalize_request_headers(request) - - def get_request_headers(self): - """Return request headers that will be sent to upstream. - The header REMOTE_USER is set to the current user - if AuthenticationMiddleware is enabled and - the view's add_remote_user property is True. - .. versionadded:: 0.9.8 - """ - request_headers = self.get_proxy_request_headers(self.request) - if not self.app_gw.authentication_header: - return request_headers - request_headers[self.app_gw.authentication_header] = self.request.user.get_username() - LOGGER.info("%s set", self.app_gw.authentication_header) - - return request_headers - - def check_permission(self): - """Check if user is authenticated and has permission to access app""" - if not hasattr(self.request, 'user'): - return False - if not self.request.user.is_authenticated: - return False - policy_engine = PolicyEngine(self.app_gw.application.policies.all()) - policy_engine.for_user(self.request.user).with_request(self.request).build() - passing, _messages = policy_engine.result - - return passing - - def get_encoded_query_params(self): - """Return encoded query params to be used in proxied request""" - get_data = encode_items(self.request.GET.lists()) - return urlencode(get_data) - - def _created_proxy_response(self, request, path): - request_payload = request.body - - LOGGER.debug("Request headers: %s", self._request_headers) - - request_url = self.get_upstream() + path - LOGGER.debug("Request URL: %s", request_url) - - if request.GET: - request_url += '?' + self.get_encoded_query_params() - LOGGER.debug("Request URL: %s", request_url) - - http = HTTP - if not self.app_gw.upstream_ssl_verification: - http = HTTP_NO_VERIFY - - try: - proxy_response = http.urlopen(request.method, - request_url, - redirect=False, - retries=None, - headers=self._request_headers, - body=request_payload, - decode_content=False, - preload_content=False) - LOGGER.debug("Proxy response header: %s", - proxy_response.getheaders()) - except urllib3.exceptions.HTTPError as error: - LOGGER.exception(error) - raise - - return proxy_response - - def _replace_host_on_redirect_location(self, request, proxy_response): - location = proxy_response.headers.get('Location') - if location: - if request.is_secure(): - scheme = 'https://' - else: - scheme = 'http://' - request_host = scheme + self.host_header - - upstream_host_http = 'http://' + self._parsed_url.netloc - upstream_host_https = 'https://' + self._parsed_url.netloc - - location = location.replace(upstream_host_http, request_host) - location = location.replace(upstream_host_https, request_host) - proxy_response.headers['Location'] = location - LOGGER.debug("Proxy response LOCATION: %s", - proxy_response.headers['Location']) - - def _set_content_type(self, request, proxy_response): - content_type = proxy_response.headers.get('Content-Type') - if not content_type: - content_type = (mimetypes.guess_type(request.path)[0] or - self.app_gw.default_content_type) - proxy_response.headers['Content-Type'] = content_type - LOGGER.debug("Proxy response CONTENT-TYPE: %s", - proxy_response.headers['Content-Type']) - - def dispatch(self, request): + def dispatch(self, request, app_gw): """Build proxied request and pass to upstream""" - if not self.check_permission(): + handler = RequestHandler(app_gw, request) + + if not handler.check_permission(): to_url = 'https://%s/?next=%s' % (CONFIG.get('domains')[0], request.get_full_path()) return RedirectView.as_view(url=to_url)(request) - self._request_headers = self.get_request_headers() - - path = self._format_path_to_redirect(request) - proxy_response = self._created_proxy_response(request, path) - - self._replace_host_on_redirect_location(request, proxy_response) - self._set_content_type(request, proxy_response) - response = get_django_response(proxy_response, strict_cookies=False) - - LOGGER.debug("RESPONSE RETURNED: %s", response) - return response + return handler.get_response() diff --git a/passbook/app_gw/proxy/handler.py b/passbook/app_gw/proxy/handler.py new file mode 100644 index 000000000..2f63ac6ad --- /dev/null +++ b/passbook/app_gw/proxy/handler.py @@ -0,0 +1,222 @@ +"""passbook app_gw request handler""" +import mimetypes +from logging import getLogger +from random import SystemRandom +from urllib.parse import urlparse + +import certifi +import urllib3 +from django.core.cache import cache +from django.utils.http import urlencode + +from passbook.app_gw.models import ApplicationGatewayProvider +from passbook.app_gw.proxy.exceptions import InvalidUpstream +from passbook.app_gw.proxy.response import get_django_response +from passbook.app_gw.proxy.utils import encode_items, normalize_request_headers +from passbook.app_gw.rewrite import Rewriter +from passbook.core.models import Application +from passbook.core.policies import PolicyEngine + +SESSION_UPSTREAM_KEY = 'passbook_app_gw_upstream' +IGNORED_HOSTNAMES_KEY = 'passbook_app_gw_ignored' +LOGGER = getLogger(__name__) +QUOTE_SAFE = r'<.;>\(}*+|~=-$/_:^@)[{]&\'!,"`' +ERRORS_MESSAGES = { + 'upstream-no-scheme': ("Upstream URL scheme must be either " + "'http' or 'https' (%s).") +} +HTTP_NO_VERIFY = urllib3.PoolManager() +HTTP = urllib3.PoolManager( + cert_reqs='CERT_REQUIRED', + ca_certs=certifi.where()) +IGNORED_HOSTS = cache.get(IGNORED_HOSTNAMES_KEY, []) + + +class RequestHandler: + """Forward requests""" + + _parsed_url = None + _request_headers = None + + def __init__(self, app_gw, request): + self.app_gw = app_gw + self.request = request + + @staticmethod + def find_app_gw_for_request(request): + """Check if a request should be proxied or forwarded to passbook""" + # Check if hostname is in cached list of ignored hostnames + # This saves us having to query the database on each request + host_header = request.META.get('HTTP_HOST') + if host_header in IGNORED_HOSTS: + LOGGER.debug("%s is ignored", host_header) + return False + # Look through all ApplicationGatewayProviders and check hostnames + matches = ApplicationGatewayProvider.objects.filter( + server_name__contains=[host_header], + enabled=True) + if not matches.exists(): + # Mo matching Providers found, add host header to ignored list + IGNORED_HOSTS.append(host_header) + cache.set(IGNORED_HOSTNAMES_KEY, IGNORED_HOSTS) + LOGGER.debug("Ignoring %s", host_header) + return False + # At this point we're certain there's a matching ApplicationGateway + if len(matches) > 1: + # This should never happen + raise ValueError + app_gw = matches.first() + try: + # Check if ApplicationGateway is associated with application + getattr(app_gw, 'application') + if app_gw: + return app_gw + except Application.DoesNotExist: + LOGGER.debug("ApplicationGateway not associated with Application") + return True + + def _get_upstream(self): + """Choose random upstream and save in session""" + if SESSION_UPSTREAM_KEY not in self.request.session: + self.request.session[SESSION_UPSTREAM_KEY] = {} + if self.app_gw.pk not in self.request.session[SESSION_UPSTREAM_KEY]: + upstream_index = int(SystemRandom().random() * len(self.app_gw.upstream)) + self.request.session[SESSION_UPSTREAM_KEY][self.app_gw.pk] = upstream_index + return self.app_gw.upstream[self.request.session[SESSION_UPSTREAM_KEY][self.app_gw.pk]] + + def get_upstream(self): + """Get upstream as parsed url""" + upstream = self._get_upstream() + + self._parsed_url = urlparse(upstream) + + if self._parsed_url.scheme not in ('http', 'https'): + raise InvalidUpstream(ERRORS_MESSAGES['upstream-no-scheme'] % + upstream) + + return upstream + + def _format_path_to_redirect(self): + LOGGER.debug("Path before: %s", self.request.get_full_path()) + rewriter = Rewriter(self.app_gw, self.request) + after = rewriter.build() + LOGGER.debug("Path after: %s", after) + return after + + def get_proxy_request_headers(self): + """Get normalized headers for the upstream + Gets all headers from the original request and normalizes them. + Normalization occurs by removing the prefix ``HTTP_`` and + replacing and ``_`` by ``-``. Example: ``HTTP_ACCEPT_ENCODING`` + becames ``Accept-Encoding``. + .. versionadded:: 0.9.1 + :param request: The original HTTPRequest instance + :returns: Normalized headers for the upstream + """ + return normalize_request_headers(self.request) + + def get_request_headers(self): + """Return request headers that will be sent to upstream. + The header REMOTE_USER is set to the current user + if AuthenticationMiddleware is enabled and + the view's add_remote_user property is True. + .. versionadded:: 0.9.8 + """ + request_headers = self.get_proxy_request_headers() + if not self.app_gw.authentication_header: + return request_headers + request_headers[self.app_gw.authentication_header] = self.request.user.get_username() + LOGGER.info("%s set", self.app_gw.authentication_header) + + return request_headers + + def check_permission(self): + """Check if user is authenticated and has permission to access app""" + if not hasattr(self.request, 'user'): + return False + if not self.request.user.is_authenticated: + return False + policy_engine = PolicyEngine(self.app_gw.application.policies.all()) + policy_engine.for_user(self.request.user).with_request(self.request).build() + passing, _messages = policy_engine.result + + return passing + + def get_encoded_query_params(self): + """Return encoded query params to be used in proxied request""" + get_data = encode_items(self.request.GET.lists()) + return urlencode(get_data) + + def _created_proxy_response(self, path): + request_payload = self.request.body + + LOGGER.debug("Request headers: %s", self._request_headers) + + request_url = self.get_upstream() + path + LOGGER.debug("Request URL: %s", request_url) + + if self.request.GET: + request_url += '?' + self.get_encoded_query_params() + LOGGER.debug("Request URL: %s", request_url) + + http = HTTP + if not self.app_gw.upstream_ssl_verification: + http = HTTP_NO_VERIFY + + try: + proxy_response = http.urlopen(self.request.method, + request_url, + redirect=False, + retries=None, + headers=self._request_headers, + body=request_payload, + decode_content=False, + preload_content=False) + LOGGER.debug("Proxy response header: %s", + proxy_response.getheaders()) + except urllib3.exceptions.HTTPError as error: + LOGGER.exception(error) + raise + + return proxy_response + + def _replace_host_on_redirect_location(self, proxy_response): + location = proxy_response.headers.get('Location') + if location: + if self.request.is_secure(): + scheme = 'https://' + else: + scheme = 'http://' + request_host = scheme + self.request.META.get('HTTP_HOST') + + upstream_host_http = 'http://' + self._parsed_url.netloc + upstream_host_https = 'https://' + self._parsed_url.netloc + + location = location.replace(upstream_host_http, request_host) + location = location.replace(upstream_host_https, request_host) + proxy_response.headers['Location'] = location + LOGGER.debug("Proxy response LOCATION: %s", + proxy_response.headers['Location']) + + def _set_content_type(self, proxy_response): + content_type = proxy_response.headers.get('Content-Type') + if not content_type: + content_type = (mimetypes.guess_type(self.request.path)[0] or + self.app_gw.default_content_type) + proxy_response.headers['Content-Type'] = content_type + LOGGER.debug("Proxy response CONTENT-TYPE: %s", + proxy_response.headers['Content-Type']) + + def get_response(self): + """Pass request to upstream and return response""" + self._request_headers = self.get_request_headers() + + path = self._format_path_to_redirect() + proxy_response = self._created_proxy_response(path) + + self._replace_host_on_redirect_location(proxy_response) + self._set_content_type(proxy_response) + response = get_django_response(proxy_response, strict_cookies=False) + + LOGGER.debug("RESPONSE RETURNED: %s", response) + return response diff --git a/passbook/app_gw/signals.py b/passbook/app_gw/signals.py index cb07171eb..163432681 100644 --- a/passbook/app_gw/signals.py +++ b/passbook/app_gw/signals.py @@ -6,8 +6,8 @@ from django.core.cache import cache from django.db.models.signals import post_save from django.dispatch import receiver -from passbook.app_gw.middleware import IGNORED_HOSTNAMES_KEY from passbook.app_gw.models import ApplicationGatewayProvider +from passbook.app_gw.proxy.handler import IGNORED_HOSTNAMES_KEY LOGGER = getLogger(__name__) diff --git a/passbook/core/policies.py b/passbook/core/policies.py index e495f018e..4d9c52aea 100644 --- a/passbook/core/policies.py +++ b/passbook/core/policies.py @@ -1,4 +1,5 @@ """passbook core policy engine""" +import cProfile from logging import getLogger from amqp.exceptions import UnexpectedFrame @@ -10,6 +11,18 @@ from ipware import get_client_ip from passbook.core.celery import CELERY_APP from passbook.core.models import Policy, User + +def profileit(func): + def wrapper(*args, **kwargs): + datafn = func.__name__ + ".profile" # Name the data file sensibly + prof = cProfile.Profile() + retval = prof.runcall(func, *args, **kwargs) + prof.dump_stats(datafn) + return retval + + return wrapper + + LOGGER = getLogger(__name__) def _cache_key(policy, user): @@ -66,6 +79,7 @@ class PolicyEngine: self.__request = request return self + @profileit def build(self): """Build task group""" if not self.__user: @@ -82,16 +96,16 @@ class PolicyEngine: for policy in self.policies: cached_policy = cache.get(_cache_key(policy, self.__user), None) if cached_policy: - LOGGER.debug("Taking result from cache for %s", policy.pk.hex) + LOGGER.warning("Taking result from cache for %s", policy.pk.hex) cached_policies.append(cached_policy) else: - LOGGER.debug("Evaluating policy %s", policy.pk.hex) + LOGGER.warning("Evaluating policy %s", policy.pk.hex) signatures.append(_policy_engine_task.signature( args=(self.__user.pk, policy.pk.hex), kwargs=kwargs, time_limit=policy.timeout)) self.__get_timeout += policy.timeout - LOGGER.debug("Set total policy timeout to %r", self.__get_timeout) + LOGGER.warning("Set total policy timeout to %r", self.__get_timeout) # If all policies are cached, we have an empty list here. if signatures: self.__group = group(signatures)()