Source code for requests_arcgis_auth.arcgis_saml_auth


"""
.. module:: arcgis_saml_auth
    :platform: Windows
    :synopsis: Used for Authentication to a portal with SAML authentication (enterprise logins)
"""
from datetime import datetime
from exceptions import ValueError

import requests
from requests.auth import AuthBase
from requests_kerberos import HTTPKerberosAuth, OPTIONAL
from requests_ntlm import HttpNtlmAuth
from bs4 import BeautifulSoup                               # required - pip install --trusted-host pypi.python.org beautifulsoup4
import re

from urlparse import urlparse
import json

from arcgis_exceptions import TokenAuthenticationError, TokenAuthenticationWarning


[docs]class ArcGISPortalSAMLAuth(AuthBase): # Esri ArcGIS Online (AGOL) and Portal for ArcGIS Authentication Handler to be used with the Python Requests Package # Specifically designed to work with portals that are federated to a SAML Based identity provider with 'enterprise logins'. # This will execute the OAuth2 "User login via Application" workflow (Authorization Code Grant) as documented at - http://resources.arcgis.com/en/help/arcgis-rest-api/index.html#//02r30000009z000000 """Auth Handler for the Esri Portal for ArcGIS product and ArcGIS Online (AGOL) configured with enterprise logins (SAML). This auth handler supports a SAML service that has Kerberos authentication enabled. A custom SAML auth handler can be provided with the 'saml_auth' parameter (example: for forms based login). Args: client_id (:obj:`str`): A 'client ID' of a registered application in the portal. capture_requests_history (:obj:`bool`, Optional): Specifices if request history should be captured in the 'history' attribute (default: False). saml_auth (:obj:`<Requests_Auth_Handler>`, Optional): An authentication handler for the SAML identity provider. Defaults to the `HTTPKerberosAuth` handler. Allows a developer to write their own handler to tie into the SAML handler if the SAML service supports a different authentication scheme. expiration (:obj:`int`, Optional): Specifies the desired expiration of the portal authentication token. Defaults to 120 (minutes) verify (:obj:`bool`, Optional): Verify SSL Certificates (default: True). Use caution disabiling this (not reccomended for production use) Attributes: redirect_uri (:obj:`str`): The redirect URI of the registered application (defaults to 'urn:ietf:wg:oauth:2.0:oob'). Leave this alone unless the redirect URI has been custom configured on the registered application saml_headers (:obj:'dict`): A dictionary of headers for the SAML service. """ def __init__(self, client_id, capture_request_history = False, saml_auth = HTTPKerberosAuth(mutual_authentication = OPTIONAL), expiration = 120, verify = True): self.client_id = client_id self.capture_request_history = capture_request_history self.history=[] self.expiration = expiration # Defaults to 2 hours (120 min) self.verify = verify self.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob' # DOI SAML service required these headers for single-sign-on. Developers can explicity over-write if needed... self.saml_headers = {"User-Agent":"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET4.0C; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0E; InfoPath.3)"} # DOI SAML service did not support 'REQUIRED' mutual authentication. HTTPKerberosAuth threw a 'mutual authentication exception'. Developers can explicity over-write if needed (or even add NTLM or some other 3rd party auth handler to the SAML communications) self.saml_auth = saml_auth ### Derived Fields ### self._verify_cert = True self._base_url = None # Expected to be - https://host/<instance>/sharing/rest where <instance> is optional self._oauth_info = None # Derived from the portal "authorize" endpoint self._saml_code = None # Derived from the SAML login self._token_data = None # Derived from the portal "token" endpoint self._token_acquired = None def __call__(self, prepared_request): # Initialze on the first call ... if (self._base_url is None): self._init(prepared_request.url) # Handle Expired Token prepared_request.register_hook('response', self._handle_response) # Check token expiration and re-acquire if needed # TODO - What happens when the refresh token is expired??? delta = datetime.now() - self._token_acquired if delta.total_seconds()/60 >= self._token_data.get("expires_in"): payload = { "client_id": self.client_id, "grant_type": "refresh_token", "refresh_token": self._token_data.get("refresh_token")} self._get_access_token(payload) return self._add_token_to_request(prepared_request) def _init(self, url): # Initialze the authentication handler (first request only) if self._base_url is None: self._derive_base_url(url) self._get_portal_authorization_info() self._authenticate_with_saml() self._get_portal_tokens() def _derive_base_url(self,url): # Need to support the following example URL's: # https://ORG.maps.arcgis.com/ # https://ORG.maps.arcgis.com # https://ORG.maps.arcgis.com/sharing/rest # https://ORG.maps.arcgis.com/sharing/rest/ # https://ORG.maps.arcgis.com/sharing/rest/portals/self # https://ORG.maps.arcgis.com/instance # https://ORG.maps.arcgis.com/instance/ # https://ORG.maps.arcgis.com/instance/sharing/rest # https://ORG.maps.arcgis.com/instance/sharing/rest/ # https://ORG.maps.arcgis.com/instance/sharing/rest/portals/self # https://fqdn/instance # https://fqdn/instance/ # https://fqdn/instance/sharing/rest # https://fqdn/instance/sharing/rest/ # https://fqdn/instance/sharing/rest/portals/self up = urlparse(url) path_splt = up.path.split("/") instance = path_splt[1] if len(path_splt) > 1 else "sharing" instance = "/{0}".format(instance) if instance.lower() != "sharing" and instance != "" else "" final_up = up._replace(path="{0}/sharing/rest".format(instance), query="") self._base_url = final_up.geturl() def _get_portal_authorization_info(self): # Obtain the Identity Provider URL (idpAuthorizeUrl) and OAUTH State (oauth_state) # http://resources.arcgis.com/en/help/arcgis-rest-api/index.html#/Authorize/02r300000214000000/ ERROR_STRING = "Unable to obtain portal authorization information" # Execute request to portal 'authorize' end-point portal_auth_url = self._base_url + "/oauth2/authorize" params = { 'client_id': self.client_id, 'response_type': 'code', 'expiration': self.expiration, 'redirect_uri': self.redirect_uri} response = requests.get(portal_auth_url, params=params, verify = self.verify) if self.capture_request_history: self.history.append(response) if response.status_code != 200: raise TokenAuthenticationError("{err}; HTTP Status Code {sc} from {url}".format(err=ERROR_STRING,sc=response.status_code,url=portal_auth_url)) # Parse the response and obtain authentication information pattern = re.compile('var oAuthInfo = ({.*?});', re.DOTALL) soup = BeautifulSoup(response.text, 'html.parser') for script in soup.find_all('script'): script_code = str(script.string.encode("utf-8")).strip() if script.string is not None else "" matches = pattern.search(script_code) if matches is not None: js_object = matches.groups()[0] self._oauth_info = json.loads(js_object) break if self._oauth_info is None or self._oauth_info == {}: raise TokenAuthenticationError("{err}; unable to parse response to obtain oAuthInfo".format(err=ERROR_STRING)) def _authenticate_with_saml(self): # Authenticate with the SAML service & obtain SAML code ERROR_STRING = r'Unable to authenticate with SAML Service' # Execute request to SAML service try: idp_url = self._oauth_info.get("federationInfo").get("idpAuthorizeUrl") except: raise TokenAuthenticationError("{err}; unable to determine IDP Authorization URL from {json}".format(err=ERROR_STRING,json=self._oauth_info)) payload = {'oauth_state':self._oauth_info.get('oauth_state')} response = requests.post(idp_url, data = payload, auth = self.saml_auth, headers = self.saml_headers, allow_redirects = True, verify = self.verify) if self.capture_request_history: self.history.append(response) if response.status_code != 200: raise TokenAuthenticationError("{err}; HTTP Status Code {sc} from {url}".format(err=ERROR_STRING,sc=response.status_code,url=idp_url)) # Parse the response and obtain the SAML CODE soup = BeautifulSoup(response.text, 'html.parser') url="" for form in soup.find_all('form', { 'name' : 'hiddenform' }): # Get the URL to POST url = form['action'] # Get all of the named input fields inputElements = form.find_all('input', { 'name' : True }) post_data = dict([(el['name'], el['value']) for el in inputElements]) # Submit the form and hopefully get our code value response = requests.post(url, data = post_data, allow_redirects = True, auth=self.saml_auth, verify = self.verify) if self.capture_request_history: self.history.append(response) if response.status_code != 200: raise TokenAuthenticationError("{err}; HTTP Status Code {sc} from {url}".format(err=ERROR_STRING,sc=response.status_code,url=idp_url)) token_content = response.text break try: soup = BeautifulSoup(token_content, 'html.parser') self._saml_code = soup.find(id='code')['value'] except: raise TokenAuthenticationError("{err}; Unable to acquire SAML code from {url}".format(err=ERROR_STRING,url=idp_url)) def _get_portal_tokens(self): # provide SAML code to portal and acquire portal access_token and refresh_token payload = { 'client_id': self.client_id, 'code': self._saml_code, 'redirect_uri': self.redirect_uri, 'grant_type': 'authorization_code'} self._get_access_token(payload) def _get_access_token(self,params): # Acquire access token # http://resources.arcgis.com/en/help/arcgis-rest-api/index.html#/Token/02r300000213000000/ ERROR_STRING = r'Unable to obtain portal access_token' portal_token_url = self._base_url + "/oauth2/token" payload = params response = requests.post(portal_token_url, data = payload, verify = self.verify) self._token_acquired = datetime.now() if self.capture_request_history: self.history.append(response) if response.status_code != 200: raise TokenAuthenticationError("{err}; HTTP Status Code {sc} from {url}".format(err=ERROR_STRING,sc=response.status_code,url=portal_token_url)) self._token_data = response.json() def _handle_response(self, resp, **kwargs): # type(r) = Response # Check the response for an expired token... re-acquire if necessary # ex: {u'error': {u'code': 498, u'details': [], u'message': u'Invalid token.'}} ### Handling Expired Tokens!!! # Check for actual HTTP Status code 498 (when f=json is not supplied) if resp.status_code == 498: return self._handle_expired_token(resp,**kwargs) # Check for JSON error (vendor spec) try: if resp.json().get("error") is not None: err = resp.json().get("error") if err.get("code") == 498 and err.get("message") == "Invalid token.": return self._handle_expired_token(resp, **kwargs) else: # Why do we get here?!?!?!? # {u'error': {u'code': 403, # u'details': [], # u'message': u'You do not have permissions to access this resource or perform this operation.', # u'messageCode': u'GWM_0003'}} # OTHERS?!?!?!? # raise TokenAuthenticationError("Failed to handle expired token...") pass # Unable to parse JSON data... requestor could ask for non-JSON formatted data... Just throw away exception for now... except ValueError: pass return resp def _handle_expired_token(self, resp, **kwargs): # Handle an expired token by re-initializing the object req=resp.request.copy() self._init(req.url) req=self._add_token_to_request(req) response = resp.connection.send(req,**kwargs) if self.capture_request_history: self.history.append(response) return response def _add_token_to_request(self, prepared_request): # Add the token to the request # Force the request to POST. Possible future implicatons here (like if a request only supports GET) prepared_request.method="POST" # Add the token to the request if self._token_data.get("access_token") is None: raise TokenAuthenticationWarning("Unable to add the access_token to the request;") params={"token": self._token_data.get("access_token")} # Remove the token form the request QUERY if it already exists... up = urlparse(prepared_request.url) orig_params = up.query.split("&") for p in orig_params: if p != "": # Handle empty Parameter List k,v = p.split("=") if k.lower() != "token": params.update({k:v}) up = up._replace(query="") prepared_request.prepare_url(up.geturl(), params = params)
return prepared_request