handler.h (19713B)
1 // Copyright 2022 The Chromium Authors. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_ANALYSIS_DEMO_HANDLER_H_ 6 #define CONTENT_ANALYSIS_DEMO_HANDLER_H_ 7 8 #include <time.h> 9 10 #include <algorithm> 11 #include <atomic> 12 #include <chrono> 13 #include <cstdio> 14 #include <fstream> 15 #include <iostream> 16 #include <optional> 17 #include <thread> 18 #include <utility> 19 #include <regex> 20 #include <vector> 21 22 #include "content_analysis/sdk/analysis_agent.h" 23 #include "demo/atomic_output.h" 24 #include "demo/request_queue.h" 25 26 using RegexArray = std::vector<std::pair<std::string, std::regex>>; 27 28 // An AgentEventHandler that dumps requests information to stdout and blocks 29 // any requests that have the keyword "block" in their data 30 class Handler : public content_analysis::sdk::AgentEventHandler { 31 public: 32 using Event = content_analysis::sdk::ContentAnalysisEvent; 33 34 Handler(std::vector<unsigned long>&& delays, const std::string& print_data_file_path, 35 RegexArray&& toBlock = RegexArray(), 36 RegexArray&& toWarn = RegexArray(), 37 RegexArray&& toReport = RegexArray()) : 38 toBlock_(std::move(toBlock)), toWarn_(std::move(toWarn)), toReport_(std::move(toReport)), 39 delays_(std::move(delays)), print_data_file_path_(print_data_file_path) {} 40 41 const std::vector<unsigned long> delays() { return delays_; } 42 size_t nextDelayIndex() const { return nextDelayIndex_; } 43 44 protected: 45 // subclasses can override this 46 // returns whether the response has been set 47 virtual bool SetCustomResponse(AtomicCout& aout, std::unique_ptr<Event>& event) { 48 return false; 49 } 50 // subclasses can override this 51 // returns whether the response has been sent 52 virtual bool SendCustomResponse(std::unique_ptr<Event>& event) { 53 return false; 54 } 55 // Analyzes one request from Google Chrome and responds back to the browser 56 // with either an allow or block verdict. 57 void AnalyzeContent(AtomicCout& aout, std::unique_ptr<Event> event) { 58 // An event represents one content analysis request and response triggered 59 // by a user action in Google Chrome. The agent determines whether the 60 // user is allowed to perform the action by examining event->GetRequest(). 61 // The verdict, which can be "allow" or "block" is written into 62 // event->GetResponse(). 63 64 DumpEvent(aout.stream(), event.get()); 65 66 bool success = true; 67 std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse; 68 bool setResponse = SetCustomResponse(aout, event); 69 if (!setResponse) { 70 caResponse = content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; 71 if (event->GetRequest().has_text_content()) { 72 caResponse = DecideCAResponse( 73 event->GetRequest().text_content(), aout.stream()); 74 } else if (event->GetRequest().has_file_path()) { 75 // TODO: Fix downloads to store file *first* so we can check contents. 76 // Until then, just check the file name: 77 caResponse = DecideCAResponse( 78 event->GetRequest().file_path(), aout.stream()); 79 } else if (event->GetRequest().has_print_data()) { 80 // In the case of print request, normally the PDF bytes would be parsed 81 // for sensitive data violations. To keep this class simple, only the 82 // URL is checked for the word "block". 83 caResponse = DecideCAResponse(event->GetRequest().request_data().url(), aout.stream()); 84 } 85 } 86 87 if (!success) { 88 content_analysis::sdk::UpdateResponse( 89 event->GetResponse(), 90 std::string(), 91 content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE); 92 aout.stream() << " Verdict: failed to reach verdict: "; 93 aout.stream() << event->DebugString() << std::endl; 94 } else { 95 aout.stream() << " Verdict: "; 96 if (caResponse) { 97 switch (caResponse.value()) { 98 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK: 99 aout.stream() << "BLOCK"; 100 break; 101 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN: 102 aout.stream() << "WARN"; 103 break; 104 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY: 105 aout.stream() << "REPORT_ONLY"; 106 break; 107 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED: 108 aout.stream() << "ACTION_UNSPECIFIED"; 109 break; 110 default: 111 aout.stream() << "<error>"; 112 break; 113 } 114 auto rc = 115 content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value()); 116 if (rc != content_analysis::sdk::ResultCode::OK) { 117 aout.stream() << " error: " 118 << content_analysis::sdk::ResultCodeToString(rc) << std::endl; 119 aout.stream() << " " << event->DebugString() << std::endl; 120 } 121 aout.stream() << std::endl; 122 } else { 123 aout.stream() << " Verdict: allow" << std::endl; 124 } 125 aout.stream() << std::endl; 126 } 127 aout.stream() << std::endl; 128 129 // If a delay is specified, wait that much. 130 size_t nextDelayIndex = nextDelayIndex_.fetch_add(1); 131 unsigned long delay = delays_[nextDelayIndex % delays_.size()]; 132 if (delay > 0) { 133 aout.stream() << "Delaying response to " << event->GetRequest().request_token() 134 << " for " << delay << "ms" << std::endl<< std::endl; 135 aout.flush(); 136 std::this_thread::sleep_for(std::chrono::milliseconds(delay)); 137 } 138 139 // Send the response back to Google Chrome. 140 bool sentCustomResponse = SendCustomResponse(event); 141 if (!sentCustomResponse) { 142 auto rc = event->Send(); 143 if (rc != content_analysis::sdk::ResultCode::OK) { 144 aout.stream() << "[Demo] Error sending response: " 145 << content_analysis::sdk::ResultCodeToString(rc) 146 << std::endl; 147 aout.stream() << event->DebugString() << std::endl; 148 } 149 } 150 } 151 152 protected: 153 void OnBrowserConnected( 154 const content_analysis::sdk::BrowserInfo& info) override { 155 AtomicCout aout; 156 aout.stream() << std::endl << "==========" << std::endl; 157 aout.stream() << "Browser connected pid=" << info.pid 158 << " path=" << info.binary_path << std::endl; 159 } 160 161 void OnBrowserDisconnected( 162 const content_analysis::sdk::BrowserInfo& info) override { 163 AtomicCout aout; 164 aout.stream() << std::endl << "Browser disconnected pid=" << info.pid << std::endl; 165 aout.stream() << "==========" << std::endl; 166 } 167 168 void OnAnalysisRequested(std::unique_ptr<Event> event) override { 169 // If the agent is capable of analyzing content in the background, the 170 // events may be handled in background threads. Having said that, a 171 // event should not be assumed to be thread safe, that is, it should not 172 // be accessed by more than one thread concurrently. 173 // 174 // In this example code, the event is handled synchronously. 175 AtomicCout aout; 176 aout.stream() << std::endl << "----------" << std::endl << std::endl; 177 AnalyzeContent(aout, std::move(event)); 178 } 179 180 void OnResponseAcknowledged( 181 const content_analysis::sdk::ContentAnalysisAcknowledgement& 182 ack) override { 183 const char* final_action = "<Unknown>"; 184 if (ack.has_final_action()) { 185 switch (ack.final_action()) { 186 case content_analysis::sdk::ContentAnalysisAcknowledgement::ACTION_UNSPECIFIED: 187 final_action = "<Unspecified>"; 188 break; 189 case content_analysis::sdk::ContentAnalysisAcknowledgement::ALLOW: 190 final_action = "Allow"; 191 break; 192 case content_analysis::sdk::ContentAnalysisAcknowledgement::REPORT_ONLY: 193 final_action = "Report only"; 194 break; 195 case content_analysis::sdk::ContentAnalysisAcknowledgement::WARN: 196 final_action = "Warn"; 197 break; 198 case content_analysis::sdk::ContentAnalysisAcknowledgement::BLOCK: 199 final_action = "Block"; 200 break; 201 } 202 } 203 204 AtomicCout aout; 205 aout.stream() << " Ack: " << ack.request_token() << std::endl; 206 aout.stream() << " Final action: " << final_action << std::endl; 207 } 208 void OnCancelRequests( 209 const content_analysis::sdk::ContentAnalysisCancelRequests& cancel) 210 override { 211 AtomicCout aout; 212 aout.stream() << "Cancel: " << std::endl; 213 aout.stream() << " User action ID: " << cancel.user_action_id() << std::endl; 214 } 215 216 void OnInternalError( 217 const char* context, 218 content_analysis::sdk::ResultCode error) override { 219 AtomicCout aout; 220 aout.stream() << std::endl 221 << "*ERROR*: context=\"" << context << "\" " 222 << content_analysis::sdk::ResultCodeToString(error) 223 << std::endl; 224 } 225 226 void DumpEvent(std::stringstream& stream, Event* event) { 227 time_t now = time(nullptr); 228 stream << "Received at: " << ctime(&now); // Includes \n. 229 stream << "Received from: pid=" << event->GetBrowserInfo().pid 230 << " path=" << event->GetBrowserInfo().binary_path << std::endl; 231 232 const content_analysis::sdk::ContentAnalysisRequest& request = 233 event->GetRequest(); 234 std::string connector = "<Unknown>"; 235 if (request.has_analysis_connector()) { 236 switch (request.analysis_connector()) { 237 case content_analysis::sdk::FILE_DOWNLOADED: 238 connector = "download"; 239 break; 240 case content_analysis::sdk::FILE_ATTACHED: 241 connector = "attach"; 242 break; 243 case content_analysis::sdk::BULK_DATA_ENTRY: 244 connector = "bulk-data-entry"; 245 break; 246 case content_analysis::sdk::PRINT: 247 connector = "print"; 248 break; 249 case content_analysis::sdk::FILE_TRANSFER: 250 connector = "file-transfer"; 251 break; 252 default: 253 break; 254 } 255 } 256 std::string reason; 257 if (request.has_reason()) { 258 using content_analysis::sdk::ContentAnalysisRequest; 259 switch (request.reason()) { 260 case content_analysis::sdk::ContentAnalysisRequest::UNKNOWN: 261 reason = "<Unknown>"; 262 break; 263 case content_analysis::sdk::ContentAnalysisRequest::CLIPBOARD_PASTE: 264 reason = "CLIPBOARD_PASTE"; 265 break; 266 case content_analysis::sdk::ContentAnalysisRequest::DRAG_AND_DROP: 267 reason = "DRAG_AND_DROP"; 268 break; 269 case content_analysis::sdk::ContentAnalysisRequest::FILE_PICKER_DIALOG: 270 reason = "FILE_PICKER_DIALOG"; 271 break; 272 case content_analysis::sdk::ContentAnalysisRequest::PRINT_PREVIEW_PRINT: 273 reason = "PRINT_PREVIEW_PRINT"; 274 break; 275 case content_analysis::sdk::ContentAnalysisRequest::SYSTEM_DIALOG_PRINT: 276 reason = "SYSTEM_DIALOG_PRINT"; 277 break; 278 case content_analysis::sdk::ContentAnalysisRequest::NORMAL_DOWNLOAD: 279 reason = "NORMAL_DOWNLOAD"; 280 break; 281 case content_analysis::sdk::ContentAnalysisRequest::SAVE_AS_DOWNLOAD: 282 reason = "SAVE_AS_DOWNLOAD"; 283 break; 284 } 285 } 286 287 std::string url = 288 request.has_request_data() && request.request_data().has_url() 289 ? request.request_data().url() : "<No URL>"; 290 291 std::string tab_title = 292 request.has_request_data() && request.request_data().has_tab_title() 293 ? request.request_data().tab_title() : "<No tab title>"; 294 295 std::string filename = 296 request.has_request_data() && request.request_data().has_filename() 297 ? request.request_data().filename() : "<No filename>"; 298 299 std::string digest = 300 request.has_request_data() && request.request_data().has_digest() 301 ? request.request_data().digest() : "<No digest>"; 302 303 std::string file_path = 304 request.has_file_path() 305 ? request.file_path() : "None, bulk text entry or print"; 306 307 std::string machine_user = 308 request.has_client_metadata() && 309 request.client_metadata().has_browser() && 310 request.client_metadata().browser().has_machine_user() 311 ? request.client_metadata().browser().machine_user() : "<No machine user>"; 312 313 std::string email = 314 request.has_request_data() && request.request_data().has_email() 315 ? request.request_data().email() : "<No email>"; 316 317 time_t t = request.expires_at(); 318 std::string expires_at_str = ctime(&t); 319 // Returned string includes trailing \n, overwrite with null. 320 expires_at_str[expires_at_str.size() - 1] = 0; 321 time_t secs_remaining = t - now; 322 323 std::string user_action_id = request.has_user_action_id() 324 ? request.user_action_id() : "<No user action id>"; 325 326 stream << "Request: " << request.request_token() << std::endl; 327 stream << " User action ID: " << user_action_id << std::endl; 328 stream << " Expires at: " << expires_at_str << " (" 329 << secs_remaining << " seconds from now)" << std::endl; 330 stream << " Connector: " << connector << std::endl; 331 if (!reason.empty()) { 332 stream << " Reason: " << reason << std::endl; 333 } 334 stream << " URL: " << url << std::endl; 335 stream << " Tab title: " << tab_title << std::endl; 336 stream << " Filename: " << filename << std::endl; 337 stream << " Digest: " << digest << std::endl; 338 stream << " Filepath: " << file_path << std::endl; 339 stream << " Machine user: " << machine_user << std::endl; 340 stream << " Email: " << email << std::endl; 341 342 if (request.has_text_content() && !request.text_content().empty()) { 343 std::string prefix = " Pasted data: "; 344 std::string text_content = request.text_content(); 345 346 // Truncate the text past 50 bytes to keep it to a reasonable length in 347 // the terminal window. 348 if (text_content.size() > 50) { 349 prefix = " Pasted data (truncated): "; 350 text_content = text_content.substr(0, 50) + "..."; 351 } 352 stream << prefix 353 << text_content 354 << std::endl; 355 stream << " Pasted data size (bytes): " 356 << request.text_content().size() 357 << std::endl; 358 } 359 360 if (request.has_print_data() && !print_data_file_path_.empty()) { 361 if (request.request_data().has_print_metadata() && 362 request.request_data().print_metadata().has_printer_name()) { 363 stream << " Printer name: " 364 << request.request_data().print_metadata().printer_name() 365 << std::endl; 366 } else { 367 stream << " No printer name in request" << std::endl; 368 } 369 370 stream << " Print data saved to: " << print_data_file_path_ 371 << std::endl; 372 using content_analysis::sdk::ContentAnalysisEvent; 373 auto print_data = 374 content_analysis::sdk::CreateScopedPrintHandle(event->GetRequest(), 375 event->GetBrowserInfo().pid); 376 std::ofstream file(print_data_file_path_, 377 std::ios::out | std::ios::trunc | std::ios::binary); 378 file.write(print_data->data(), print_data->size()); 379 file.flush(); 380 file.close(); 381 } 382 } 383 384 bool ReadContentFromFile(const std::string& file_path, 385 std::string* content) { 386 std::ifstream file(file_path, 387 std::ios::in | std::ios::binary | std::ios::ate); 388 if (!file.is_open()) 389 return false; 390 391 // Get file size. This example does not handle files larger than 1MB. 392 // Make sure content string can hold the contents of the file. 393 int size = file.tellg(); 394 if (size > 1024 * 1024) 395 return false; 396 397 content->resize(size + 1); 398 399 // Read file into string. 400 file.seekg(0, std::ios::beg); 401 file.read(&(*content)[0], size); 402 content->at(size) = 0; 403 return true; 404 } 405 406 std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> 407 DecideCAResponse(const std::string& content, std::stringstream& stream) { 408 for (auto& r : toBlock_) { 409 if (std::regex_search(content, r.second)) { 410 stream << "'" << content << "' matches BLOCK regex '" 411 << r.first << "'" << std::endl; 412 return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; 413 } 414 } 415 for (auto& r : toWarn_) { 416 if (std::regex_search(content, r.second)) { 417 stream << "'" << content << "' matches WARN regex '" 418 << r.first << "'" << std::endl; 419 return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN; 420 } 421 } 422 for (auto& r : toReport_) { 423 if (std::regex_search(content, r.second)) { 424 stream << "'" << content << "' matches REPORT_ONLY regex '" 425 << r.first << "'" << std::endl; 426 return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY; 427 } 428 } 429 stream << "'" << content << "' was ALLOWed\n"; 430 return {}; 431 } 432 433 // For the demo, block any content that matches these wildcards. 434 RegexArray toBlock_; 435 RegexArray toWarn_; 436 RegexArray toReport_; 437 438 std::vector<unsigned long> delays_; 439 std::atomic<size_t> nextDelayIndex_; 440 std::string print_data_file_path_; 441 }; 442 443 // An AgentEventHandler that dumps requests information to stdout and blocks 444 // any requests that have the keyword "block" in their data 445 class QueuingHandler : public Handler { 446 public: 447 QueuingHandler(unsigned long threads, std::vector<unsigned long>&& delays, const std::string& print_data_file_path, 448 RegexArray&& toBlock = RegexArray(), 449 RegexArray&& toWarn = RegexArray(), 450 RegexArray&& toReport = RegexArray()) 451 : Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) { 452 StartBackgroundThreads(threads); 453 } 454 455 ~QueuingHandler() override { 456 // Abort background process and wait for it to finish. 457 request_queue_.abort(); 458 WaitForBackgroundThread(); 459 } 460 461 private: 462 void OnAnalysisRequested(std::unique_ptr<Event> event) override { 463 { 464 time_t now = time(nullptr); 465 const content_analysis::sdk::ContentAnalysisRequest& request = 466 event->GetRequest(); 467 AtomicCout aout; 468 aout.stream() << std::endl << "Queuing request: " << request.request_token() 469 << " at " << ctime(&now) << std::endl; 470 } 471 472 request_queue_.push(std::move(event)); 473 } 474 475 static void* ProcessRequests(void* qh) { 476 QueuingHandler* handler = reinterpret_cast<QueuingHandler*>(qh); 477 478 while (true) { 479 auto event = handler->request_queue_.pop(); 480 if (!event) 481 break; 482 483 AtomicCout aout; 484 aout.stream() << std::endl << "----------" << std::endl; 485 aout.stream() << "Thread: " << std::this_thread::get_id() 486 << std::endl; 487 aout.stream() << "Delaying request processing for " 488 << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "ms" << std::endl << std::endl; 489 aout.flush(); 490 491 handler->AnalyzeContent(aout, std::move(event)); 492 } 493 494 return 0; 495 } 496 497 // A list of outstanding content analysis requests. 498 RequestQueue request_queue_; 499 500 void StartBackgroundThreads(unsigned long threads) { 501 threads_.reserve(threads); 502 for (unsigned long i = 0; i < threads; ++i) { 503 threads_.emplace_back(std::make_unique<std::thread>(ProcessRequests, this)); 504 } 505 } 506 507 void WaitForBackgroundThread() { 508 for (auto& thread : threads_) { 509 thread->join(); 510 } 511 } 512 513 // Thread id of backgrond thread. 514 std::vector<std::unique_ptr<std::thread>> threads_; 515 }; 516 517 #endif // CONTENT_ANALYSIS_DEMO_HANDLER_H_