tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

handler.h (19713B)


      1 // Copyright 2022 The Chromium Authors.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CONTENT_ANALYSIS_DEMO_HANDLER_H_
      6 #define CONTENT_ANALYSIS_DEMO_HANDLER_H_
      7 
      8 #include <time.h>
      9 
     10 #include <algorithm>
     11 #include <atomic>
     12 #include <chrono>
     13 #include <cstdio>
     14 #include <fstream>
     15 #include <iostream>
     16 #include <optional>
     17 #include <thread>
     18 #include <utility>
     19 #include <regex>
     20 #include <vector>
     21 
     22 #include "content_analysis/sdk/analysis_agent.h"
     23 #include "demo/atomic_output.h"
     24 #include "demo/request_queue.h"
     25 
     26 using RegexArray = std::vector<std::pair<std::string, std::regex>>;
     27 
     28 // An AgentEventHandler that dumps requests information to stdout and blocks
     29 // any requests that have the keyword "block" in their data
     30 class Handler : public content_analysis::sdk::AgentEventHandler {
     31 public:
     32  using Event = content_analysis::sdk::ContentAnalysisEvent;
     33 
     34  Handler(std::vector<unsigned long>&& delays, const std::string& print_data_file_path,
     35          RegexArray&& toBlock = RegexArray(),
     36          RegexArray&& toWarn = RegexArray(),
     37          RegexArray&& toReport = RegexArray()) :
     38      toBlock_(std::move(toBlock)), toWarn_(std::move(toWarn)), toReport_(std::move(toReport)),
     39      delays_(std::move(delays)), print_data_file_path_(print_data_file_path) {}
     40 
     41  const std::vector<unsigned long> delays() { return delays_; }
     42  size_t nextDelayIndex() const { return nextDelayIndex_; }
     43 
     44 protected:
     45  // subclasses can override this
     46  // returns whether the response has been set
     47  virtual bool SetCustomResponse(AtomicCout& aout, std::unique_ptr<Event>& event) {
     48    return false;
     49  }
     50  // subclasses can override this
     51  // returns whether the response has been sent
     52  virtual bool SendCustomResponse(std::unique_ptr<Event>& event) {
     53    return false;
     54  }
     55  // Analyzes one request from Google Chrome and responds back to the browser
     56  // with either an allow or block verdict.
     57  void AnalyzeContent(AtomicCout& aout, std::unique_ptr<Event> event) {
     58    // An event represents one content analysis request and response triggered
     59    // by a user action in Google Chrome.  The agent determines whether the
     60    // user is allowed to perform the action by examining event->GetRequest().
     61    // The verdict, which can be "allow" or "block" is written into
     62    // event->GetResponse().
     63 
     64    DumpEvent(aout.stream(), event.get());
     65 
     66    bool success = true;
     67    std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse;
     68    bool setResponse = SetCustomResponse(aout, event);
     69    if (!setResponse) {
     70      caResponse = content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK;
     71      if (event->GetRequest().has_text_content()) {
     72        caResponse = DecideCAResponse(
     73            event->GetRequest().text_content(), aout.stream());
     74      } else if (event->GetRequest().has_file_path()) {
     75        // TODO: Fix downloads to store file *first* so we can check contents.
     76        // Until then, just check the file name:
     77        caResponse = DecideCAResponse(
     78            event->GetRequest().file_path(), aout.stream());
     79      } else if (event->GetRequest().has_print_data()) {
     80        // In the case of print request, normally the PDF bytes would be parsed
     81        // for sensitive data violations. To keep this class simple, only the
     82        // URL is checked for the word "block".
     83        caResponse = DecideCAResponse(event->GetRequest().request_data().url(), aout.stream());
     84      }
     85    }
     86 
     87    if (!success) {
     88      content_analysis::sdk::UpdateResponse(
     89          event->GetResponse(),
     90          std::string(),
     91          content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE);
     92      aout.stream() << "  Verdict: failed to reach verdict: ";
     93      aout.stream() << event->DebugString() << std::endl;
     94    } else {
     95      aout.stream() << "  Verdict: ";
     96      if (caResponse) {
     97        switch (caResponse.value()) {
     98          case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK:
     99            aout.stream() << "BLOCK";
    100            break;
    101          case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN:
    102            aout.stream() << "WARN";
    103            break;
    104          case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY:
    105            aout.stream() << "REPORT_ONLY";
    106            break;
    107          case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED:
    108            aout.stream() << "ACTION_UNSPECIFIED";
    109            break;
    110          default:
    111            aout.stream() << "<error>";
    112            break;
    113        }
    114        auto rc =
    115          content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value());
    116        if (rc != content_analysis::sdk::ResultCode::OK) {
    117          aout.stream() << " error: "
    118                        << content_analysis::sdk::ResultCodeToString(rc) << std::endl;
    119          aout.stream() << "  " << event->DebugString() << std::endl;
    120        }
    121        aout.stream() << std::endl;
    122      } else {
    123        aout.stream() << "  Verdict: allow" << std::endl;
    124      }
    125      aout.stream() << std::endl;
    126    }
    127    aout.stream() << std::endl;
    128 
    129    // If a delay is specified, wait that much.
    130    size_t nextDelayIndex = nextDelayIndex_.fetch_add(1);
    131    unsigned long delay = delays_[nextDelayIndex % delays_.size()];
    132    if (delay > 0) {
    133      aout.stream() << "Delaying response to " << event->GetRequest().request_token()
    134                    << " for " << delay << "ms" << std::endl<< std::endl;
    135      aout.flush();
    136      std::this_thread::sleep_for(std::chrono::milliseconds(delay));
    137    }
    138 
    139    // Send the response back to Google Chrome.
    140    bool sentCustomResponse = SendCustomResponse(event);
    141    if (!sentCustomResponse) {
    142      auto rc = event->Send();
    143      if (rc != content_analysis::sdk::ResultCode::OK) {
    144        aout.stream() << "[Demo] Error sending response: "
    145                      << content_analysis::sdk::ResultCodeToString(rc)
    146                      << std::endl;
    147        aout.stream() << event->DebugString() << std::endl;
    148      }
    149    }
    150  }
    151 
    152 protected:
    153  void OnBrowserConnected(
    154      const content_analysis::sdk::BrowserInfo& info) override {
    155    AtomicCout aout;
    156    aout.stream() << std::endl << "==========" << std::endl;
    157    aout.stream() << "Browser connected pid=" << info.pid
    158                  << " path=" << info.binary_path << std::endl;
    159  }
    160 
    161  void OnBrowserDisconnected(
    162      const content_analysis::sdk::BrowserInfo& info) override {
    163    AtomicCout aout;
    164    aout.stream() << std::endl << "Browser disconnected pid=" << info.pid << std::endl;
    165    aout.stream() << "==========" << std::endl;
    166  }
    167 
    168  void OnAnalysisRequested(std::unique_ptr<Event> event) override {
    169    // If the agent is capable of analyzing content in the background, the
    170    // events may be handled in background threads.  Having said that, a
    171    // event should not be assumed to be thread safe, that is, it should not
    172    // be accessed by more than one thread concurrently.
    173    //
    174    // In this example code, the event is handled synchronously.
    175    AtomicCout aout;
    176    aout.stream() << std::endl << "----------" << std::endl << std::endl;
    177    AnalyzeContent(aout, std::move(event));
    178  }
    179 
    180  void OnResponseAcknowledged(
    181      const content_analysis::sdk::ContentAnalysisAcknowledgement&
    182          ack) override {
    183    const char* final_action = "<Unknown>";
    184    if (ack.has_final_action()) {
    185      switch (ack.final_action()) {
    186      case content_analysis::sdk::ContentAnalysisAcknowledgement::ACTION_UNSPECIFIED:
    187        final_action = "<Unspecified>";
    188        break;
    189      case content_analysis::sdk::ContentAnalysisAcknowledgement::ALLOW:
    190        final_action = "Allow";
    191        break;
    192      case content_analysis::sdk::ContentAnalysisAcknowledgement::REPORT_ONLY:
    193        final_action = "Report only";
    194        break;
    195      case content_analysis::sdk::ContentAnalysisAcknowledgement::WARN:
    196        final_action = "Warn";
    197        break;
    198      case content_analysis::sdk::ContentAnalysisAcknowledgement::BLOCK:
    199        final_action = "Block";
    200        break;
    201      }
    202    }
    203 
    204    AtomicCout aout;
    205    aout.stream() << "  Ack: " << ack.request_token() << std::endl;
    206    aout.stream() << "  Final action: " << final_action << std::endl;
    207  }
    208  void OnCancelRequests(
    209      const content_analysis::sdk::ContentAnalysisCancelRequests& cancel)
    210      override {
    211    AtomicCout aout;
    212    aout.stream() << "Cancel: " << std::endl;
    213    aout.stream() << "  User action ID: " << cancel.user_action_id() << std::endl;
    214  }
    215 
    216  void OnInternalError(
    217      const char* context,
    218      content_analysis::sdk::ResultCode error) override {
    219    AtomicCout aout;
    220    aout.stream() << std::endl
    221                  << "*ERROR*: context=\"" << context << "\" "
    222                  << content_analysis::sdk::ResultCodeToString(error)
    223                  << std::endl;
    224  }
    225 
    226  void DumpEvent(std::stringstream& stream, Event* event) {
    227    time_t now = time(nullptr);
    228    stream << "Received at: " << ctime(&now);  // Includes \n.
    229    stream << "Received from: pid=" << event->GetBrowserInfo().pid
    230           <<  " path=" << event->GetBrowserInfo().binary_path << std::endl;
    231 
    232    const content_analysis::sdk::ContentAnalysisRequest& request =
    233        event->GetRequest();
    234    std::string connector = "<Unknown>";
    235    if (request.has_analysis_connector()) {
    236      switch (request.analysis_connector()) {
    237        case content_analysis::sdk::FILE_DOWNLOADED:
    238          connector = "download";
    239          break;
    240        case content_analysis::sdk::FILE_ATTACHED:
    241          connector = "attach";
    242          break;
    243        case content_analysis::sdk::BULK_DATA_ENTRY:
    244          connector = "bulk-data-entry";
    245          break;
    246        case content_analysis::sdk::PRINT:
    247          connector = "print";
    248          break;
    249        case content_analysis::sdk::FILE_TRANSFER:
    250          connector = "file-transfer";
    251          break;
    252        default:
    253          break;
    254      }
    255    }
    256    std::string reason;
    257    if (request.has_reason()) {
    258      using content_analysis::sdk::ContentAnalysisRequest;
    259      switch (request.reason()) {
    260        case content_analysis::sdk::ContentAnalysisRequest::UNKNOWN:
    261          reason = "<Unknown>";
    262          break;
    263        case content_analysis::sdk::ContentAnalysisRequest::CLIPBOARD_PASTE:
    264          reason = "CLIPBOARD_PASTE";
    265          break;
    266        case content_analysis::sdk::ContentAnalysisRequest::DRAG_AND_DROP:
    267          reason = "DRAG_AND_DROP";
    268          break;
    269        case content_analysis::sdk::ContentAnalysisRequest::FILE_PICKER_DIALOG:
    270          reason = "FILE_PICKER_DIALOG";
    271          break;
    272        case content_analysis::sdk::ContentAnalysisRequest::PRINT_PREVIEW_PRINT:
    273          reason = "PRINT_PREVIEW_PRINT";
    274          break;
    275        case content_analysis::sdk::ContentAnalysisRequest::SYSTEM_DIALOG_PRINT:
    276          reason = "SYSTEM_DIALOG_PRINT";
    277          break;
    278        case content_analysis::sdk::ContentAnalysisRequest::NORMAL_DOWNLOAD:
    279          reason = "NORMAL_DOWNLOAD";
    280          break;
    281        case content_analysis::sdk::ContentAnalysisRequest::SAVE_AS_DOWNLOAD:
    282          reason = "SAVE_AS_DOWNLOAD";
    283          break;
    284      }
    285    }
    286 
    287    std::string url =
    288        request.has_request_data() && request.request_data().has_url()
    289        ? request.request_data().url() : "<No URL>";
    290 
    291    std::string tab_title =
    292        request.has_request_data() && request.request_data().has_tab_title()
    293        ? request.request_data().tab_title() : "<No tab title>";
    294 
    295    std::string filename =
    296        request.has_request_data() && request.request_data().has_filename()
    297        ? request.request_data().filename() : "<No filename>";
    298 
    299    std::string digest =
    300        request.has_request_data() && request.request_data().has_digest()
    301        ? request.request_data().digest() : "<No digest>";
    302 
    303    std::string file_path =
    304        request.has_file_path()
    305        ? request.file_path() : "None, bulk text entry or print";
    306 
    307    std::string machine_user =
    308        request.has_client_metadata() &&
    309        request.client_metadata().has_browser() &&
    310        request.client_metadata().browser().has_machine_user()
    311      ? request.client_metadata().browser().machine_user() : "<No machine user>";
    312 
    313    std::string email =
    314        request.has_request_data() && request.request_data().has_email()
    315      ? request.request_data().email() : "<No email>";
    316 
    317    time_t t = request.expires_at();
    318    std::string expires_at_str = ctime(&t);
    319    // Returned string includes trailing \n, overwrite with null.
    320    expires_at_str[expires_at_str.size() - 1] = 0;
    321    time_t secs_remaining = t - now;
    322 
    323    std::string user_action_id = request.has_user_action_id()
    324        ? request.user_action_id() : "<No user action id>";
    325 
    326    stream << "Request: " << request.request_token() << std::endl;
    327    stream << "  User action ID: " << user_action_id << std::endl;
    328    stream << "  Expires at: " << expires_at_str << " ("
    329           << secs_remaining << " seconds from now)" << std::endl;
    330    stream << "  Connector: " << connector << std::endl;
    331    if (!reason.empty()) {
    332      stream << "  Reason: " << reason << std::endl;
    333    }
    334    stream << "  URL: " << url << std::endl;
    335    stream << "  Tab title: " << tab_title << std::endl;
    336    stream << "  Filename: " << filename << std::endl;
    337    stream << "  Digest: " << digest << std::endl;
    338    stream << "  Filepath: " << file_path << std::endl;
    339    stream << "  Machine user: " << machine_user << std::endl;
    340    stream << "  Email: " << email << std::endl;
    341 
    342    if (request.has_text_content() && !request.text_content().empty()) {
    343      std::string prefix = "  Pasted data: ";
    344      std::string text_content = request.text_content();
    345 
    346      // Truncate the text past 50 bytes to keep it to a reasonable length in
    347      // the terminal window.
    348      if (text_content.size() > 50) {
    349        prefix = "  Pasted data (truncated): ";
    350        text_content = text_content.substr(0, 50) + "...";
    351      }
    352      stream << prefix
    353             << text_content
    354             << std::endl;
    355      stream << "  Pasted data size (bytes): "
    356             << request.text_content().size()
    357             << std::endl;
    358    }
    359 
    360    if (request.has_print_data() && !print_data_file_path_.empty()) {
    361      if (request.request_data().has_print_metadata() &&
    362          request.request_data().print_metadata().has_printer_name()) {
    363        stream << "  Printer name: "
    364               << request.request_data().print_metadata().printer_name()
    365               << std::endl;
    366      } else {
    367        stream << "  No printer name in request" << std::endl;
    368      }
    369 
    370      stream << "  Print data saved to: " << print_data_file_path_
    371                << std::endl;
    372      using content_analysis::sdk::ContentAnalysisEvent;
    373      auto print_data =
    374          content_analysis::sdk::CreateScopedPrintHandle(event->GetRequest(),
    375                   event->GetBrowserInfo().pid);
    376      std::ofstream file(print_data_file_path_,
    377                         std::ios::out | std::ios::trunc | std::ios::binary);
    378      file.write(print_data->data(), print_data->size());
    379      file.flush();
    380      file.close();
    381    }
    382  }
    383 
    384  bool ReadContentFromFile(const std::string& file_path,
    385                          std::string* content) {
    386    std::ifstream file(file_path,
    387                      std::ios::in | std::ios::binary | std::ios::ate);
    388    if (!file.is_open())
    389      return false;
    390 
    391    // Get file size.  This example does not handle files larger than 1MB.
    392    // Make sure content string can hold the contents of the file.
    393    int size = file.tellg();
    394    if (size > 1024 * 1024)
    395      return false;
    396 
    397    content->resize(size + 1);
    398 
    399    // Read file into string.
    400    file.seekg(0, std::ios::beg);
    401    file.read(&(*content)[0], size);
    402    content->at(size) = 0;
    403    return true;
    404  }
    405 
    406  std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action>
    407  DecideCAResponse(const std::string& content, std::stringstream& stream) {
    408    for (auto& r : toBlock_) {
    409      if (std::regex_search(content, r.second)) {
    410        stream << "'" << content << "' matches BLOCK regex '"
    411                  << r.first << "'" << std::endl;
    412        return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK;
    413      }
    414    }
    415    for (auto& r : toWarn_) {
    416      if (std::regex_search(content, r.second)) {
    417        stream << "'" << content << "' matches WARN regex '"
    418                  << r.first << "'" << std::endl;
    419        return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN;
    420      }
    421    }
    422    for (auto& r : toReport_) {
    423      if (std::regex_search(content, r.second)) {
    424        stream << "'" << content << "' matches REPORT_ONLY regex '"
    425                  << r.first << "'" << std::endl;
    426        return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY;
    427      }
    428    }
    429    stream << "'" << content << "' was ALLOWed\n";
    430    return {};
    431  }
    432 
    433  // For the demo, block any content that matches these wildcards.
    434  RegexArray toBlock_;
    435  RegexArray toWarn_;
    436  RegexArray toReport_;
    437 
    438  std::vector<unsigned long> delays_;
    439  std::atomic<size_t> nextDelayIndex_;
    440  std::string print_data_file_path_;
    441 };
    442 
    443 // An AgentEventHandler that dumps requests information to stdout and blocks
    444 // any requests that have the keyword "block" in their data
    445 class QueuingHandler : public Handler {
    446 public:
    447  QueuingHandler(unsigned long threads, std::vector<unsigned long>&& delays, const std::string& print_data_file_path,
    448    RegexArray&& toBlock = RegexArray(),
    449          RegexArray&& toWarn = RegexArray(),
    450          RegexArray&& toReport = RegexArray())
    451      : Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport))  {
    452    StartBackgroundThreads(threads);
    453  }
    454 
    455  ~QueuingHandler() override {
    456    // Abort background process and wait for it to finish.
    457    request_queue_.abort();
    458    WaitForBackgroundThread();
    459  }
    460 
    461 private:
    462  void OnAnalysisRequested(std::unique_ptr<Event> event) override {
    463    {
    464      time_t now = time(nullptr);
    465      const content_analysis::sdk::ContentAnalysisRequest& request =
    466        event->GetRequest();
    467      AtomicCout aout;
    468      aout.stream() << std::endl << "Queuing request: " << request.request_token()
    469                    << " at " << ctime(&now) << std::endl;
    470    }
    471 
    472    request_queue_.push(std::move(event));
    473  }
    474 
    475  static void* ProcessRequests(void* qh) {
    476    QueuingHandler* handler = reinterpret_cast<QueuingHandler*>(qh);
    477 
    478    while (true) {
    479      auto event = handler->request_queue_.pop();
    480      if (!event)
    481        break;
    482 
    483      AtomicCout aout;
    484      aout.stream()  << std::endl << "----------" << std::endl;
    485      aout.stream() << "Thread: " << std::this_thread::get_id()
    486                    << std::endl;
    487      aout.stream() << "Delaying request processing for "
    488                    << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "ms" << std::endl << std::endl;
    489      aout.flush();
    490 
    491      handler->AnalyzeContent(aout, std::move(event));
    492    }
    493 
    494    return 0;
    495  }
    496 
    497  // A list of outstanding content analysis requests.
    498  RequestQueue request_queue_;
    499 
    500  void StartBackgroundThreads(unsigned long threads) {
    501    threads_.reserve(threads);
    502    for (unsigned long i = 0; i < threads; ++i) {
    503      threads_.emplace_back(std::make_unique<std::thread>(ProcessRequests, this));
    504    }
    505  }
    506 
    507  void WaitForBackgroundThread() {
    508    for (auto& thread : threads_) {
    509      thread->join();
    510    }
    511  }
    512 
    513  // Thread id of backgrond thread.
    514  std::vector<std::unique_ptr<std::thread>> threads_;
    515 };
    516 
    517 #endif  // CONTENT_ANALYSIS_DEMO_HANDLER_H_