gecko-dev/dom/security/nsCSPParser.h

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsCSPParser_h___
#define nsCSPParser_h___

#include "nsCSPUtils.h"
#include "nsIURI.h"
#include "nsString.h"

/**
 * How does the parsing work?
 *
 * We generate tokens by splitting the policy-string by whitespace and semicolon.
 * Interally the tokens are represented as an array of string-arrays:
 *
 *  [
 *    [ name, src, src, src, ... ],
 *    [ name, src, src, src, ... ],
 *    [ name, src, src, src, ... ]
 *  ]
 *
 * for example:
 *  [
 *    [ img-src, http://www.example.com, http:www.test.com ],
 *    [ default-src, 'self'],
 *    [ script-src, 'unsafe-eval', 'unsafe-inline' ],
 *  ]
 *
 * The first element of each array has to be a valid directive-name, otherwise we can
 * ignore the remaining elements of the array. Also, if the
 * directive already exists in the current policy, we can ignore
 * the remaining elements of that array. (http://www.w3.org/TR/CSP/#parsing)
 */

typedef nsTArray< nsTArray<nsString> > cspTokens;

class nsCSPTokenizer {

  public:
    static void tokenizeCSPPolicy(const nsAString &aPolicyString, cspTokens& outTokens);

  private:
    nsCSPTokenizer(const char16_t* aStart, const char16_t* aEnd);
    ~nsCSPTokenizer();

    inline bool atEnd()
    {
      return mCurChar >= mEndChar;
    }

    inline void skipWhiteSpace()
    {
      while (mCurChar < mEndChar && *mCurChar == ' ') {
        mCurToken.Append(*mCurChar++);
      }
      mCurToken.Truncate();
    }

    inline void skipWhiteSpaceAndSemicolon()
    {
      while (mCurChar < mEndChar && (*mCurChar == ' ' || *mCurChar == ';')) {
        mCurToken.Append(*mCurChar++);
      }
      mCurToken.Truncate();
    }

    inline bool accept(char16_t aChar)
    {
      NS_ASSERTION(mCurChar < mEndChar, "Trying to dereference mEndChar");
      if (*mCurChar == aChar) {
        mCurToken.Append(*mCurChar++);
        return true;
      }
      return false;
    }

    void generateNextToken();
    void generateTokens(cspTokens& outTokens);

    const char16_t* mCurChar;
    const char16_t* mEndChar;
    nsString        mCurToken;
};


class nsCSPParser {

  public:
    /**
     * The CSP parser only has one publicly accessible function, which is parseContentSecurityPolicy.
     * Internally the input string is separated into string tokens and policy() is called, which starts
     * parsing the policy. The parser calls one function after the other according the the source-list
     * from http://www.w3.org/TR/CSP11/#source-list. E.g., the parser can only call port() after the parser
     * has already processed any possible host in host(), similar to a finite state machine.
     */
    static nsCSPPolicy* parseContentSecurityPolicy(const nsAString &aPolicyString,
                                                   nsIURI *aSelfURI,
                                                   bool aReportOnly,
                                                   uint64_t aInnerWindowID);

  private:
    nsCSPParser(cspTokens& aTokens,
                nsIURI* aSelfURI,
                uint64_t aInnerWindowID);
    ~nsCSPParser();


    // Parsing the CSP using the source-list from http://www.w3.org/TR/CSP11/#source-list
    nsCSPPolicy*    policy();
    void            directive();
    nsCSPDirective* directiveName();
    void            directiveValue(nsTArray<nsCSPBaseSrc*>& outSrcs);
    void            referrerDirectiveValue();
    void            sourceList(nsTArray<nsCSPBaseSrc*>& outSrcs);
    nsCSPBaseSrc*   sourceExpression();
    nsCSPSchemeSrc* schemeSource();
    nsCSPHostSrc*   hostSource();
    nsCSPBaseSrc*   keywordSource();
    nsCSPNonceSrc*  nonceSource();
    nsCSPHashSrc*   hashSource();
    nsCSPHostSrc*   appHost(); // helper function to support app specific hosts
    nsCSPHostSrc*   host();
    bool            hostChar();
    bool            schemeChar();
    bool            port();
    bool            path(nsCSPHostSrc* aCspHost);

    bool subHost();                                       // helper function to parse subDomains
    bool atValidUnreservedChar();                         // helper function to parse unreserved
    bool atValidSubDelimChar();                           // helper function to parse sub-delims
    bool atValidPctEncodedChar();                         // helper function to parse pct-encoded
    bool subPath(nsCSPHostSrc* aCspHost);                 // helper function to parse paths
    void reportURIList(nsTArray<nsCSPBaseSrc*>& outSrcs); // helper function to parse report-uris
    void percentDecodeStr(const nsAString& aEncStr,       // helper function to percent-decode
                          nsAString& outDecStr);

    inline bool atEnd()
    {
      return mCurChar >= mEndChar;
    }

    inline bool accept(char16_t aSymbol)
    {
      if (atEnd()) { return false; }
      return (*mCurChar == aSymbol) && advance();
    }

    inline bool accept(bool (*aClassifier) (char16_t))
    {
      if (atEnd()) { return false; }
      return (aClassifier(*mCurChar)) && advance();
    }

    inline bool peek(char16_t aSymbol)
    {
      if (atEnd()) { return false; }
      return *mCurChar == aSymbol;
    }

    inline bool peek(bool (*aClassifier) (char16_t))
    {
      if (atEnd()) { return false; }
      return aClassifier(*mCurChar);
    }

    inline bool advance()
    {
      if (atEnd()) { return false; }
      mCurValue.Append(*mCurChar++);
      return true;
    }

    inline void resetCurValue()
    {
      mCurValue.Truncate();
    }

    bool atEndOfPath();
    bool atValidPathChar();

    void resetCurChar(const nsAString& aToken);

    void logWarningErrorToConsole(uint32_t aSeverityFlag,
                                  const char* aProperty,
                                  const char16_t* aParams[],
                                  uint32_t aParamsLength);

/**
 * When parsing the policy, the parser internally uses the following helper
 * variables/members which are used/reset during parsing. The following
 * example explains how they are used.
 * The tokenizer separats all input into arrays of arrays of strings, which
 * are stored in mTokens, for example:
 *   mTokens = [ [ script-src, http://www.example.com, 'self' ], ... ]
 *
 * When parsing starts, mCurdir always holds the currently processed array of strings.
 * In our example:
 *   mCurDir = [ script-src, http://www.example.com, 'self' ]
 *
 * During parsing, we process/consume one string at a time of that array.
 * We set mCurToken to the string we are currently processing; in the first case
 * that would be:
 *   mCurToken = script-src
 * which allows to do simple string comparisons to see if mCurToken is a valid directive.
 *
 * Continuing parsing, the parser consumes the next string of that array, resetting:
 *   mCurToken = "http://www.example.com"
 *                ^                     ^
 *                mCurChar              mEndChar (points *after* the 'm')
 *   mCurValue = ""
 *
 * After calling advance() the first time, helpers would hold the following values:
 *   mCurToken = "http://www.example.com"
 *                 ^                    ^
 *                 mCurChar             mEndChar (points *after* the 'm')
 *  mCurValue = "h"
 *
 * We continue parsing till all strings of one directive are consumed, then we reset
 * mCurDir to hold the next array of strings and start the process all over.
 */

    const char16_t*    mCurChar;
    const char16_t*    mEndChar;
    nsString           mCurValue;
    nsString           mCurToken;
    nsTArray<nsString> mCurDir;

    // cache variables to ignore unsafe-inline if hash or nonce is specified
    bool               mHasHashOrNonce; // false, if no hash or nonce is defined
    nsCSPKeywordSrc*   mUnsafeInlineKeywordSrc; // null, otherwise invlidate()

    cspTokens          mTokens;
    nsIURI*            mSelfURI;
    nsCSPPolicy*       mPolicy;
    uint64_t           mInnerWindowID; // used for console reporting
};

#endif /* nsCSPParser_h___ */
Bug 1152551, part 2 - Fix mode lines in dom/. r=jst 2015-05-03 19:32:37 +00:00			`/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -- */`
			`/* vim: set ts=8 sts=2 et sw=2 tw=80: */`
Bug 951457 - Create C++ CSP Parser and policy classes, part 0 - parser and utils r=sstamm, r=jst 2014-05-14 18:03:43 +00:00			`/* This Source Code Form is subject to the terms of the Mozilla Public`
			`* License, v. 2.0. If a copy of the MPL was not distributed with this`
			`* file, You can obtain one at http://mozilla.org/MPL/2.0/. */`

			`#ifndef nsCSPParser_h___`
			`#define nsCSPParser_h___`

			`#include "nsCSPUtils.h"`
			`#include "nsIURI.h"`
			`#include "nsString.h"`

			`/**`
			`* How does the parsing work?`
			`*`
			`* We generate tokens by splitting the policy-string by whitespace and semicolon.`
			`* Interally the tokens are represented as an array of string-arrays:`
			`*`
			`* [`
			`* [ name, src, src, src, ... ],`
			`* [ name, src, src, src, ... ],`
			`* [ name, src, src, src, ... ]`
			`* ]`
			`*`
			`* for example:`
			`* [`
			`* [ img-src, http://www.example.com, http:www.test.com ],`
			`* [ default-src, 'self'],`
			`* [ script-src, 'unsafe-eval', 'unsafe-inline' ],`
			`* ]`
			`*`
			`* The first element of each array has to be a valid directive-name, otherwise we can`
			`* ignore the remaining elements of the array. Also, if the`
			`* directive already exists in the current policy, we can ignore`
			`* the remaining elements of that array. (http://www.w3.org/TR/CSP/#parsing)`
			`*/`

			`typedef nsTArray< nsTArray<nsString> > cspTokens;`

			`class nsCSPTokenizer {`

			`public:`
			`static void tokenizeCSPPolicy(const nsAString &aPolicyString, cspTokens& outTokens);`

			`private:`
			`nsCSPTokenizer(const char16_t* aStart, const char16_t* aEnd);`
			`~nsCSPTokenizer();`

			`inline bool atEnd()`
			`{`
			`return mCurChar >= mEndChar;`
			`}`

			`inline void skipWhiteSpace()`
			`{`
			`while (mCurChar < mEndChar && *mCurChar == ' ') {`
			`mCurToken.Append(*mCurChar++);`
			`}`
			`mCurToken.Truncate();`
			`}`

			`inline void skipWhiteSpaceAndSemicolon()`
			`{`
			`while (mCurChar < mEndChar && (mCurChar == ' ' \|\| mCurChar == ';')) {`
			`mCurToken.Append(*mCurChar++);`
			`}`
			`mCurToken.Truncate();`
			`}`

			`inline bool accept(char16_t aChar)`
			`{`
			`NS_ASSERTION(mCurChar < mEndChar, "Trying to dereference mEndChar");`
			`if (*mCurChar == aChar) {`
			`mCurToken.Append(*mCurChar++);`
			`return true;`
			`}`
			`return false;`
			`}`

			`void generateNextToken();`
			`void generateTokens(cspTokens& outTokens);`

			`const char16_t* mCurChar;`
			`const char16_t* mEndChar;`
			`nsString mCurToken;`
			`};`


			`class nsCSPParser {`

			`public:`
			`/**`
			`* The CSP parser only has one publicly accessible function, which is parseContentSecurityPolicy.`
			`* Internally the input string is separated into string tokens and policy() is called, which starts`
			`* parsing the policy. The parser calls one function after the other according the the source-list`
			`* from http://www.w3.org/TR/CSP11/#source-list. E.g., the parser can only call port() after the parser`
			`* has already processed any possible host in host(), similar to a finite state machine.`
			`*/`
			`static nsCSPPolicy* parseContentSecurityPolicy(const nsAString &aPolicyString,`
			`nsIURI *aSelfURI,`
			`bool aReportOnly,`
			`uint64_t aInnerWindowID);`

			`private:`
			`nsCSPParser(cspTokens& aTokens,`
			`nsIURI* aSelfURI,`
			`uint64_t aInnerWindowID);`
			`~nsCSPParser();`


			`// Parsing the CSP using the source-list from http://www.w3.org/TR/CSP11/#source-list`
			`nsCSPPolicy* policy();`
			`void directive();`
			`nsCSPDirective* directiveName();`
			`void directiveValue(nsTArray<nsCSPBaseSrc*>& outSrcs);`
Bug 965727 - Implement referrer directive for CSP. (r=jst,ckerschb) 2014-12-17 19:14:04 +00:00			`void referrerDirectiveValue();`
Bug 951457 - Create C++ CSP Parser and policy classes, part 0 - parser and utils r=sstamm, r=jst 2014-05-14 18:03:43 +00:00			`void sourceList(nsTArray<nsCSPBaseSrc*>& outSrcs);`
			`nsCSPBaseSrc* sourceExpression();`
			`nsCSPSchemeSrc* schemeSource();`
			`nsCSPHostSrc* hostSource();`
			`nsCSPBaseSrc* keywordSource();`
			`nsCSPNonceSrc* nonceSource();`
			`nsCSPHashSrc* hashSource();`
			`nsCSPHostSrc* appHost(); // helper function to support app specific hosts`
			`nsCSPHostSrc* host();`
			`bool hostChar();`
			`bool schemeChar();`
			`bool port();`
			`bool path(nsCSPHostSrc* aCspHost);`

Backed out 4 changesets (bug 671389) for frequent B2G debug test_tcpsocket_client_and_server_basics.html crashes. Backed out changeset b782435e5640 (bug 671389) Backed out changeset 0f8d62109bfe (bug 671389) Backed out changeset 8d6021f66c49 (bug 671389) Backed out changeset cd3e227df9dc (bug 671389) 2015-02-05 21:48:18 +00:00			`bool subHost(); // helper function to parse subDomains`
			`bool atValidUnreservedChar(); // helper function to parse unreserved`
			`bool atValidSubDelimChar(); // helper function to parse sub-delims`
			`bool atValidPctEncodedChar(); // helper function to parse pct-encoded`
			`bool subPath(nsCSPHostSrc* aCspHost); // helper function to parse paths`
			`void reportURIList(nsTArray<nsCSPBaseSrc*>& outSrcs); // helper function to parse report-uris`
			`void percentDecodeStr(const nsAString& aEncStr, // helper function to percent-decode`
Bug 1094067 - CSP: Parser should follow path production from RFC 3986 (r=sstamm) 2014-11-14 23:05:36 +00:00			`nsAString& outDecStr);`
Bug 951457 - Create C++ CSP Parser and policy classes, part 0 - parser and utils r=sstamm, r=jst 2014-05-14 18:03:43 +00:00
			`inline bool atEnd()`
			`{`
			`return mCurChar >= mEndChar;`
			`}`

			`inline bool accept(char16_t aSymbol)`
			`{`
			`if (atEnd()) { return false; }`
			`return (*mCurChar == aSymbol) && advance();`
			`}`

			`inline bool accept(bool (*aClassifier) (char16_t))`
			`{`
			`if (atEnd()) { return false; }`
			`return (aClassifier(*mCurChar)) && advance();`
			`}`

			`inline bool peek(char16_t aSymbol)`
			`{`
			`if (atEnd()) { return false; }`
			`return *mCurChar == aSymbol;`
			`}`

			`inline bool peek(bool (*aClassifier) (char16_t))`
			`{`
			`if (atEnd()) { return false; }`
			`return aClassifier(*mCurChar);`
			`}`

			`inline bool advance()`
			`{`
			`if (atEnd()) { return false; }`
			`mCurValue.Append(*mCurChar++);`
			`return true;`
			`}`

			`inline void resetCurValue()`
			`{`
			`mCurValue.Truncate();`
			`}`

Bug 808292 - CSP: Implement path-level host-source matching, parser updates (r=grobinson,sstamm) 2014-08-12 20:06:19 +00:00			`bool atEndOfPath();`
			`bool atValidPathChar();`

Bug 951457 - Create C++ CSP Parser and policy classes, part 0 - parser and utils r=sstamm, r=jst 2014-05-14 18:03:43 +00:00			`void resetCurChar(const nsAString& aToken);`

			`void logWarningErrorToConsole(uint32_t aSeverityFlag,`
			`const char* aProperty,`
			`const char16_t* aParams[],`
			`uint32_t aParamsLength);`

			`/**`
			`* When parsing the policy, the parser internally uses the following helper`
			`* variables/members which are used/reset during parsing. The following`
			`* example explains how they are used.`
			`* The tokenizer separats all input into arrays of arrays of strings, which`
			`* are stored in mTokens, for example:`
			`* mTokens = [ [ script-src, http://www.example.com, 'self' ], ... ]`
			`*`
			`* When parsing starts, mCurdir always holds the currently processed array of strings.`
			`* In our example:`
			`* mCurDir = [ script-src, http://www.example.com, 'self' ]`
			`*`
			`* During parsing, we process/consume one string at a time of that array.`
			`* We set mCurToken to the string we are currently processing; in the first case`
			`* that would be:`
			`* mCurToken = script-src`
			`* which allows to do simple string comparisons to see if mCurToken is a valid directive.`
			`*`
			`* Continuing parsing, the parser consumes the next string of that array, resetting:`
			`* mCurToken = "http://www.example.com"`
			`* ^ ^`
			`* mCurChar mEndChar (points after the 'm')`
			`* mCurValue = ""`
			`*`
			`* After calling advance() the first time, helpers would hold the following values:`
			`* mCurToken = "http://www.example.com"`
			`* ^ ^`
			`* mCurChar mEndChar (points after the 'm')`
			`* mCurValue = "h"`
			`*`
			`* We continue parsing till all strings of one directive are consumed, then we reset`
			`* mCurDir to hold the next array of strings and start the process all over.`
			`*/`

			`const char16_t* mCurChar;`
			`const char16_t* mEndChar;`
			`nsString mCurValue;`
			`nsString mCurToken;`
			`nsTArray<nsString> mCurDir;`

Bug 1004703 - ignore 'unsafe-inline' if nonce- or hash-source specified (r=sstamm) --HG-- extra : rebase_source : 119d3903ba156300f49f097efe2d18bbf8c8a6b5 2015-04-07 16:06:05 +00:00			`// cache variables to ignore unsafe-inline if hash or nonce is specified`
			`bool mHasHashOrNonce; // false, if no hash or nonce is defined`
			`nsCSPKeywordSrc* mUnsafeInlineKeywordSrc; // null, otherwise invlidate()`

Bug 951457 - Create C++ CSP Parser and policy classes, part 0 - parser and utils r=sstamm, r=jst 2014-05-14 18:03:43 +00:00			`cspTokens mTokens;`
			`nsIURI* mSelfURI;`
			`nsCSPPolicy* mPolicy;`
			`uint64_t mInnerWindowID; // used for console reporting`
			`};`

			`#endif /* nsCSPParser_h___ */`