tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

CanRunScriptChecker.cpp (20387B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /**
      6 * This checker implements the "can run script" analysis.  The idea is to detect
      7 * functions that can run script that are being passed reference-counted
      8 * arguments (including "this") whose refcount might go to zero as a result of
      9 * the script running.  We want to prevent that.
     10 *
     11 * The approach is to attempt to enforce the following invariants on the call
     12 * graph:
     13 *
     14 * 1) Any caller of a MOZ_CAN_RUN_SCRIPT function is itself MOZ_CAN_RUN_SCRIPT.
     15 * 2) If a virtual MOZ_CAN_RUN_SCRIPT method overrides a base class method,
     16 *    that base class method is also MOZ_CAN_RUN_SCRIPT.
     17 *
     18 * Invariant 2 ensures that we don't accidentally call a MOZ_CAN_RUN_SCRIPT
     19 * function via a base-class virtual call.  Invariant 1 ensures that
     20 * the property of being able to run script propagates up the callstack.  There
     21 * is an opt-out for invariant 1: A function (declaration _or_ implementation)
     22 * can be decorated with MOZ_CAN_RUN_SCRIPT_BOUNDARY to indicate that we do not
     23 * require it or any of its callers to be MOZ_CAN_RUN_SCRIPT even if it calls
     24 * MOZ_CAN_RUN_SCRIPT functions.
     25 *
     26 * There are two known holes in invariant 1, apart from the
     27 * MOZ_CAN_RUN_SCRIPT_BOUNDARY opt-out:
     28 *
     29 *  - Functions called via function pointers can be MOZ_CAN_RUN_SCRIPT even if
     30 *    their caller is not, because we have no way to determine from the function
     31 *    pointer what function is being called.
     32 *  - MOZ_CAN_RUN_SCRIPT destructors can happen in functions that are not
     33 *    MOZ_CAN_RUN_SCRIPT.
     34 *    https://bugzilla.mozilla.org/show_bug.cgi?id=1535523 tracks this.
     35 *
     36 * Given those invariants we then require that when calling a MOZ_CAN_RUN_SCRIPT
     37 * function all refcounted arguments (including "this") satisfy one of these
     38 * conditions:
     39 *  a) The argument is held via a strong pointer on the stack.
     40 *  b) The argument is a const strong pointer member of "this".  We know "this"
     41 *     is being kept alive, and a const strong pointer member can't drop its ref
     42 *     until "this" dies.
     43 *  c) The argument is an argument of the caller (and hence held by a strong
     44 *     pointer somewhere higher up the callstack).
     45 *  d) The argument is explicitly annotated with MOZ_KnownLive, which indicates
     46 *     that something is guaranteed to keep it alive (e.g. it's rooted via a JS
     47 *     reflector).
     48 *  e) The argument is constexpr and therefore cannot disappear.
     49 */
     50 
     51 #include "CanRunScriptChecker.h"
     52 #include "CustomMatchers.h"
     53 #include "clang/Lex/Lexer.h"
     54 
     55 void CanRunScriptChecker::registerMatchers(MatchFinder *AstMatcher) {
     56  auto Refcounted = qualType(hasDeclaration(cxxRecordDecl(isRefCounted())));
     57  auto StackSmartPtr = ignoreTrivials(declRefExpr(to(varDecl(
     58      hasAutomaticStorageDuration(), hasType(isSmartPtrToRefCounted())))));
     59  auto ConstMemberOfThisSmartPtr =
     60      memberExpr(hasType(isSmartPtrToRefCounted()), hasType(isConstQualified()),
     61                 hasObjectExpression(cxxThisExpr()));
     62  // A smartptr can be known-live for three reasons:
     63  // 1) It's declared on the stack.
     64  // 2) It's a const member of "this".  We know "this" is alive (recursively)
     65  //    and const members can't change their value hence can't drop their
     66  //    reference until "this" gets destroyed.
     67  // 3) It's an immediate temporary being constructed at the point where the
     68  //    call is happening.
     69  auto KnownLiveSmartPtr = anyOf(
     70      StackSmartPtr, ConstMemberOfThisSmartPtr,
     71      ignoreTrivials(cxxConstructExpr(hasType(isSmartPtrToRefCounted()))));
     72 
     73  auto MozKnownLiveCall =
     74      ignoreTrivials(callExpr(callee(functionDecl(hasName("MOZ_KnownLive")))));
     75 
     76  // Params of the calling function are presumed live, because it itself should
     77  // be MOZ_CAN_RUN_SCRIPT.  Note that this is subject to
     78  // https://bugzilla.mozilla.org/show_bug.cgi?id=1537656 at the moment.
     79  auto KnownLiveParam = anyOf(
     80      // "this" is OK
     81      cxxThisExpr(),
     82      // A parameter of the calling function is OK.
     83      declRefExpr(to(parmVarDecl())));
     84 
     85  auto KnownLiveMemberOfParam =
     86      memberExpr(hasKnownLiveAnnotation(),
     87                 hasObjectExpression(anyOf(
     88                     ignoreTrivials(KnownLiveParam),
     89                     declRefExpr(to(varDecl(hasAutomaticStorageDuration()))))));
     90 
     91  // A matcher that matches various things that are known to be live directly,
     92  // without making any assumptions about operators.
     93  auto KnownLiveBaseExceptRef = anyOf(
     94      // Things that are known to be a stack or immutable refptr.
     95      KnownLiveSmartPtr,
     96      // MOZ_KnownLive() calls.
     97      MozKnownLiveCall,
     98      // Params of the caller function.
     99      KnownLiveParam,
    100      // Members of the params that are marked as MOZ_KNOWN_LIVE
    101      KnownLiveMemberOfParam,
    102      // Constexpr things.
    103      declRefExpr(to(varDecl(isConstexpr()))));
    104 
    105  // A reference of smart ptr which is initialized with known live thing is OK.
    106  // FIXME: This does not allow nested references.
    107  auto RefToKnownLivePtr = ignoreTrivials(declRefExpr(to(varDecl(
    108      hasAutomaticStorageDuration(), hasType(referenceType()),
    109      hasInitializer(anyOf(
    110          KnownLiveSmartPtr, KnownLiveParam, KnownLiveMemberOfParam,
    111          conditionalOperator(
    112              hasFalseExpression(ignoreTrivials(anyOf(
    113                  KnownLiveSmartPtr, KnownLiveParam, KnownLiveMemberOfParam,
    114                  declRefExpr(to(varDecl(isConstexpr()))),
    115                  // E.g., for RefPtr<T>::operator*()
    116                  cxxOperatorCallExpr(
    117                      hasOverloadedOperatorName("*"),
    118                      hasAnyArgument(
    119                          anyOf(KnownLiveBaseExceptRef,
    120                                ignoreTrivials(KnownLiveMemberOfParam))),
    121                      argumentCountIs(1)),
    122                  // E.g., for *T
    123                  unaryOperator(unaryDereferenceOperator(),
    124                                hasUnaryOperand(
    125                                    ignoreTrivials(KnownLiveBaseExceptRef)))))),
    126              hasTrueExpression(ignoreTrivials(anyOf(
    127                  KnownLiveSmartPtr, KnownLiveParam, KnownLiveMemberOfParam,
    128                  declRefExpr(to(varDecl(isConstexpr()))),
    129                  // E.g., for RefPtr<T>::operator*()
    130                  cxxOperatorCallExpr(
    131                      hasOverloadedOperatorName("*"),
    132                      hasAnyArgument(
    133                          anyOf(KnownLiveBaseExceptRef,
    134                                ignoreTrivials(KnownLiveMemberOfParam))),
    135                      argumentCountIs(1)),
    136                  // E.g., for *T
    137                  unaryOperator(unaryDereferenceOperator(),
    138                                hasUnaryOperand(ignoreTrivials(
    139                                    KnownLiveBaseExceptRef)))))))))))));
    140 
    141  // A matcher that matches various things that are known to be live directly,
    142  // without making any assumptions about operators.
    143  auto KnownLiveBase =
    144      anyOf(KnownLiveBaseExceptRef,
    145            // Smart pointer refs initialized with known live smart ptrs.
    146            RefToKnownLivePtr);
    147 
    148  // A matcher that matches various known-live things that don't involve
    149  // non-unary operators.
    150  auto KnownLiveSimple = anyOf(
    151      // Things that are just known live.
    152      KnownLiveBase,
    153      // Method calls on a live things that are smart ptrs.  Note that we don't
    154      // want to allow general method calls on live things, because those can
    155      // return non-live objects (e.g. consider "live_pointer->foo()" as an
    156      // example).  For purposes of this analysis we are assuming the method
    157      // calls on smart ptrs all just return the pointer inside,
    158      cxxMemberCallExpr(
    159          on(anyOf(allOf(hasType(isSmartPtrToRefCounted()), KnownLiveBase),
    160                   // Allow it if calling a member method which is marked as
    161                   // MOZ_KNOWN_LIVE
    162                   KnownLiveMemberOfParam))),
    163      // operator* or operator-> on a thing that is already known to be live.
    164      cxxOperatorCallExpr(
    165          hasAnyOverloadedOperatorName("*", "->"),
    166          hasAnyArgument(
    167              anyOf(KnownLiveBase, ignoreTrivials(KnownLiveMemberOfParam))),
    168          argumentCountIs(1)),
    169      // A dereference on a thing that is known to be live.  This is _not_
    170      // caught by the "operator* or operator->" clause above, because
    171      // cxxOperatorCallExpr() only catches cases when a class defines
    172      // operator*.  The default (built-in) operator* matches unaryOperator()
    173      // instead.),
    174      unaryOperator(
    175          unaryDereferenceOperator(),
    176          hasUnaryOperand(
    177              // If we're doing *someArg, the argument of the dereference is an
    178              // ImplicitCastExpr LValueToRValue which has the DeclRefExpr as an
    179              // argument.  We could try to match that explicitly with a custom
    180              // matcher (none of the built-in matchers seem to match on the
    181              // thing being cast for an implicitCastExpr), but it's simpler to
    182              // just use ignoreTrivials to strip off the cast.
    183              ignoreTrivials(KnownLiveBase))),
    184      // Taking a pointer to a live reference.  We explicitly want to exclude
    185      // things that are not of type reference-to-refcounted or type refcounted,
    186      // because if someone takes a pointer to a pointer to refcounted or a
    187      // pointer to a smart ptr and passes those in to a callee that definitely
    188      // does not guarantee liveness; in fact the callee could modify those
    189      // things!  In practice they would be the wrong type anyway, though, so
    190      // it's hard to add a test for this.
    191      unaryOperator(hasOperatorName("&"),
    192                    hasUnaryOperand(allOf(anyOf(hasType(references(Refcounted)),
    193                                                hasType(Refcounted)),
    194                                          ignoreTrivials(KnownLiveBase)))));
    195 
    196  auto KnownLive = anyOf(
    197      // Anything above, of course.
    198      KnownLiveSimple,
    199      // Conditional operators where both arms are live.
    200      conditionalOperator(hasFalseExpression(ignoreTrivials(KnownLiveSimple)),
    201                          hasTrueExpression(ignoreTrivials(KnownLiveSimple)))
    202      // We're not handling cases like a dereference of a conditional operator,
    203      // mostly because handling a dereference in general is so ugly.  I
    204      // _really_ wish I could just write a recursive matcher here easily.
    205  );
    206 
    207  auto InvalidArg = ignoreTrivialsConditional(
    208      // We want to consider things if there is anything refcounted involved,
    209      // including in any of the trivials that we otherwise strip off.
    210      anyOf(hasType(Refcounted), hasType(pointsTo(Refcounted)),
    211            hasType(references(Refcounted)), hasType(isSmartPtrToRefCounted())),
    212      // We want to find any expression,
    213      expr(
    214          // which is not known live,
    215          unless(KnownLive),
    216          // and which is not a default arg with value nullptr, since those are
    217          // always safe,
    218          unless(cxxDefaultArgExpr(isNullDefaultArg())),
    219          // and which is not a literal nullptr,
    220          unless(cxxNullPtrLiteralExpr()), expr().bind("invalidArg")));
    221 
    222  // A matcher which will mark the first invalid argument it finds invalid, but
    223  // will always match, even if it finds no invalid arguments, so it doesn't
    224  // preclude other matchers from running and maybe finding invalid args.
    225  auto OptionalInvalidExplicitArg = anyOf(
    226      // We want to find any argument which is invalid.
    227      hasAnyArgument(InvalidArg),
    228 
    229      // This makes this matcher optional.
    230      anything());
    231 
    232  // Please note that the hasCanRunScriptAnnotation() matchers are not present
    233  // directly in the cxxMemberCallExpr, callExpr and constructExpr matchers
    234  // because we check that the corresponding functions can run script later in
    235  // the checker code.
    236  AstMatcher->addMatcher(
    237      expr(
    238          anyOf(
    239              // We want to match a method call expression,
    240              cxxMemberCallExpr(
    241                  // which optionally has an invalid arg,
    242                  OptionalInvalidExplicitArg,
    243                  // or which optionally has an invalid this argument,
    244                  anyOf(on(InvalidArg), anything()), expr().bind("callExpr")),
    245              // or a regular call expression,
    246              callExpr(
    247                  // which optionally has an invalid arg.
    248                  OptionalInvalidExplicitArg, expr().bind("callExpr")),
    249              // or a construct expression,
    250              cxxConstructExpr(
    251                  // which optionally has an invalid arg.
    252                  OptionalInvalidExplicitArg, expr().bind("constructExpr"))),
    253 
    254          anyOf(
    255              // We want to match the parent function.
    256              forFunction(functionDecl().bind("nonCanRunScriptParentFunction")),
    257 
    258              // ... optionally.
    259              anything())),
    260      this);
    261 }
    262 
    263 void CanRunScriptChecker::onStartOfTranslationUnit() {
    264  IsFuncSetBuilt = false;
    265  CanRunScriptFuncs.clear();
    266 }
    267 
    268 namespace {
    269 /// This class is a callback used internally to match function declarations with
    270 /// the MOZ_CAN_RUN_SCRIPT annotation, adding these functions to the
    271 /// can-run-script function set and making sure the functions they override (if
    272 /// any) also have the annotation.
    273 class FuncSetCallback : public MatchFinder::MatchCallback {
    274 public:
    275  FuncSetCallback(CanRunScriptChecker &Checker,
    276                  std::unordered_set<const FunctionDecl *> &FuncSet)
    277      : CanRunScriptFuncs(FuncSet), Checker(Checker) {}
    278 
    279  void run(const MatchFinder::MatchResult &Result) override;
    280 
    281 private:
    282  /// This method checks the methods overriden by the given parameter.
    283  void checkOverriddenMethods(const CXXMethodDecl *Method);
    284 
    285  std::unordered_set<const FunctionDecl *> &CanRunScriptFuncs;
    286  CanRunScriptChecker &Checker;
    287 };
    288 
    289 void FuncSetCallback::run(const MatchFinder::MatchResult &Result) {
    290  const FunctionDecl *Func;
    291  if (auto *Lambda = Result.Nodes.getNodeAs<LambdaExpr>("lambda")) {
    292    Func = Lambda->getCallOperator();
    293    if (!Func || !hasCustomAttribute<moz_can_run_script>(Func))
    294      return;
    295  } else {
    296    Func = Result.Nodes.getNodeAs<FunctionDecl>("canRunScriptFunction");
    297 
    298    const char *ErrorAttrInDefinition =
    299        "MOZ_CAN_RUN_SCRIPT must be put in front "
    300        "of the declaration, not the definition";
    301    const char *NoteAttrInDefinition = "The first declaration exists here";
    302    if (!Func->isFirstDecl() &&
    303        !hasCustomAttribute<moz_can_run_script_for_definition>(Func)) {
    304      const FunctionDecl *FirstDecl = Func->getFirstDecl();
    305      if (!hasCustomAttribute<moz_can_run_script>(FirstDecl)) {
    306        Checker.diag(Func->getLocation(), ErrorAttrInDefinition,
    307                     DiagnosticIDs::Error);
    308        Checker.diag(FirstDecl->getLocation(), NoteAttrInDefinition,
    309                     DiagnosticIDs::Note);
    310      }
    311    }
    312  }
    313 
    314  CanRunScriptFuncs.insert(Func);
    315 
    316  // If this is a method, we check the methods it overrides.
    317  if (auto *Method = dyn_cast<CXXMethodDecl>(Func)) {
    318    checkOverriddenMethods(Method);
    319  }
    320 }
    321 
    322 void FuncSetCallback::checkOverriddenMethods(const CXXMethodDecl *Method) {
    323  for (auto OverriddenMethod : Method->overridden_methods()) {
    324    if (!hasCustomAttribute<moz_can_run_script>(OverriddenMethod)) {
    325      const char *ErrorNonCanRunScriptOverridden =
    326          "functions marked as MOZ_CAN_RUN_SCRIPT cannot override functions "
    327          "that are not marked MOZ_CAN_RUN_SCRIPT";
    328      const char *NoteNonCanRunScriptOverridden =
    329          "overridden function declared here";
    330 
    331      Checker.diag(Method->getLocation(), ErrorNonCanRunScriptOverridden,
    332                   DiagnosticIDs::Error);
    333      Checker.diag(OverriddenMethod->getLocation(),
    334                   NoteNonCanRunScriptOverridden, DiagnosticIDs::Note);
    335    }
    336  }
    337 }
    338 } // namespace
    339 
    340 void CanRunScriptChecker::buildFuncSet(ASTContext *Context) {
    341  // We create a match finder.
    342  MatchFinder Finder;
    343  // We create the callback which will be called when we find a function with
    344  // a MOZ_CAN_RUN_SCRIPT annotation.
    345  FuncSetCallback Callback(*this, CanRunScriptFuncs);
    346  // We add the matcher to the finder, linking it to our callback.
    347  Finder.addMatcher(
    348      functionDecl(hasCanRunScriptAnnotation()).bind("canRunScriptFunction"),
    349      &Callback);
    350  Finder.addMatcher(lambdaExpr().bind("lambda"), &Callback);
    351  // We start the analysis, given the ASTContext our main checker is in.
    352  Finder.matchAST(*Context);
    353 }
    354 
    355 void CanRunScriptChecker::check(const MatchFinder::MatchResult &Result) {
    356 
    357  // If the set of functions which can run script is not yet built, then build
    358  // it.
    359  if (!IsFuncSetBuilt) {
    360    buildFuncSet(Result.Context);
    361    IsFuncSetBuilt = true;
    362  }
    363 
    364  const char *ErrorInvalidArg =
    365      "arguments must all be strong refs or caller's parameters when calling a "
    366      "function marked as MOZ_CAN_RUN_SCRIPT (including the implicit object "
    367      "argument).  '%0' is neither.";
    368 
    369  const char *ErrorNonCanRunScriptParent =
    370      "functions marked as MOZ_CAN_RUN_SCRIPT can only be called from "
    371      "functions also marked as MOZ_CAN_RUN_SCRIPT";
    372  const char *NoteNonCanRunScriptParent = "caller function declared here";
    373 
    374  const Expr *InvalidArg;
    375  if (const CXXDefaultArgExpr *defaultArg =
    376          Result.Nodes.getNodeAs<CXXDefaultArgExpr>("invalidArg")) {
    377    InvalidArg = defaultArg->getExpr();
    378  } else {
    379    InvalidArg = Result.Nodes.getNodeAs<Expr>("invalidArg");
    380  }
    381 
    382  const CallExpr *Call = Result.Nodes.getNodeAs<CallExpr>("callExpr");
    383  // If we don't find the FunctionDecl linked to this call or if it's not marked
    384  // as can-run-script, consider that we didn't find a match.
    385  if (Call && (!Call->getDirectCallee() ||
    386               !CanRunScriptFuncs.count(Call->getDirectCallee()))) {
    387    Call = nullptr;
    388  }
    389 
    390  const CXXConstructExpr *Construct =
    391      Result.Nodes.getNodeAs<CXXConstructExpr>("constructExpr");
    392 
    393  // If we don't find the CXXConstructorDecl linked to this construct expression
    394  // or if it's not marked as can-run-script, consider that we didn't find a
    395  // match.
    396  if (Construct && (!Construct->getConstructor() ||
    397                    !CanRunScriptFuncs.count(Construct->getConstructor()))) {
    398    Construct = nullptr;
    399  }
    400 
    401  const FunctionDecl *ParentFunction =
    402      Result.Nodes.getNodeAs<FunctionDecl>("nonCanRunScriptParentFunction");
    403  // If the parent function can run script, consider that we didn't find a match
    404  // because we only care about parent functions which can't run script.
    405  //
    406  // In addition, If the parent function is annotated as a
    407  // CAN_RUN_SCRIPT_BOUNDARY, we don't want to complain about it calling a
    408  // CAN_RUN_SCRIPT function. This is a mechanism to opt out of the infectious
    409  // nature of CAN_RUN_SCRIPT which is necessary in some tricky code like
    410  // Bindings.
    411  if (ParentFunction &&
    412      (CanRunScriptFuncs.count(ParentFunction) ||
    413       hasCustomAttribute<moz_can_run_script_boundary>(ParentFunction))) {
    414    ParentFunction = nullptr;
    415  }
    416 
    417  // Get the call range from either the CallExpr or the ConstructExpr.
    418  SourceRange CallRange;
    419  if (Call) {
    420    CallRange = Call->getSourceRange();
    421  } else if (Construct) {
    422    CallRange = Construct->getSourceRange();
    423  } else {
    424    // If we have neither a Call nor a Construct, we have nothing do to here.
    425    return;
    426  }
    427 
    428  // If we have an invalid argument in the call, we emit the diagnostic to
    429  // signal it.
    430  if (InvalidArg) {
    431    const StringRef invalidArgText = Lexer::getSourceText(
    432        CharSourceRange::getTokenRange(InvalidArg->getSourceRange()),
    433        Result.Context->getSourceManager(), Result.Context->getLangOpts());
    434    diag(InvalidArg->getExprLoc(), ErrorInvalidArg, DiagnosticIDs::Error)
    435        << InvalidArg->getSourceRange() << invalidArgText;
    436  }
    437 
    438  // If the parent function is not marked as MOZ_CAN_RUN_SCRIPT, we emit an
    439  // error and a not indicating it.
    440  if (ParentFunction) {
    441    assert(!hasCustomAttribute<moz_can_run_script>(ParentFunction) &&
    442           "Matcher missed something");
    443 
    444    diag(CallRange.getBegin(), ErrorNonCanRunScriptParent, DiagnosticIDs::Error)
    445        << CallRange;
    446 
    447    diag(ParentFunction->getCanonicalDecl()->getLocation(),
    448         NoteNonCanRunScriptParent, DiagnosticIDs::Note);
    449  }
    450 }