tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

language-model-prompt-multimodal.tentative.https.window.js (12028B)


      1 // META: title=Language Model Prompt Multimodal
      2 // META: script=/resources/testdriver.js
      3 // META: script=../resources/util.js
      4 // META: timeout=long
      5 
      6 'use strict';
      7 
      8 const kPrompt = 'describe this';
      9 const kValidImagePath = '/images/computer.jpg';
     10 const kValidAudioPath = '/media/speech.wav';
     11 const kValidSVGImagePath = '/images/pattern.svg';
     12 const kValidVideoPath = '/media/test.webm';
     13 
     14 const kImageOptions = {expectedInputs: [{type: 'image'}]};
     15 const kAudioOptions = {expectedInputs: [{type: 'audio'}]};
     16 
     17 function messageWithContent(prompt, type, value) {
     18  return [{
     19    role: 'user',
     20    content: [{type: 'text', value: prompt}, {type: type, value: value}]
     21  }];
     22 }
     23 
     24 /*****************************************
     25 * General tests
     26 *****************************************/
     27 
     28 promise_test(async t => {
     29  await ensureLanguageModel(kImageOptions);
     30  const newImage = new Image();
     31  newImage.src = kValidImagePath;
     32  const session = await createLanguageModel(kImageOptions);
     33  // TODO(crbug.com/409615288): Expect a TypeError according to the spec.
     34  return promise_rejects_dom(
     35      t, 'SyntaxError',
     36      session.prompt(messageWithContent(kPrompt, 'text', newImage)));
     37 }, 'Prompt with type:"text" and image content should reject');
     38 
     39 promise_test(async t => {
     40  await ensureLanguageModel(kImageOptions);
     41  const newImage = new Image();
     42  newImage.src = kValidImagePath;
     43  const session = await createLanguageModel(kImageOptions);
     44  return promise_rejects_dom(t, 'NotSupportedError', session.prompt([
     45    {role: 'assistant', content: [{type: 'image', value: newImage}]}
     46  ]));
     47 }, 'Prompt with assistant role should reject with multimodal input');
     48 
     49 /*****************************************
     50 * Image tests
     51 *****************************************/
     52 
     53 promise_test(async (t) => {
     54  await ensureLanguageModel();
     55  const newImage = new Image();
     56  newImage.src = kValidImagePath;
     57  const session = await createLanguageModel();
     58  return promise_rejects_dom(
     59      t, 'NotSupportedError',
     60      session.prompt(messageWithContent(kPrompt, 'image', newImage)));
     61 }, 'Prompt image without `image` expectedInput');
     62 
     63 promise_test(async () => {
     64  const blob = await (await fetch(kValidImagePath)).blob();
     65  const options = {
     66    expectedInputs: [{type: 'image'}],
     67    initialPrompts: messageWithContent(kPrompt, 'image', blob)
     68  };
     69  await ensureLanguageModel(options);
     70  const session = await LanguageModel.create(options);
     71  const tokenLength = await session.measureInputUsage(options.initialPrompts);
     72  assert_greater_than(tokenLength, 0);
     73  assert_equals(session.inputUsage, tokenLength);
     74  assert_regexp_match(
     75      await session.prompt([{role: 'system', content: ''}]),
     76      /<image>/);
     77 }, 'Test Image initialPrompt');
     78 
     79 promise_test(async () => {
     80  await ensureLanguageModel(kImageOptions);
     81  const blob = await (await fetch(kValidImagePath)).blob();
     82  const session = await createLanguageModel(kImageOptions);
     83  const result =
     84      await session.prompt(messageWithContent(kPrompt, 'image', blob));
     85  assert_regexp_match(result, /<image>/);
     86 }, 'Prompt with Blob image content');
     87 
     88 promise_test(async () => {
     89  await ensureLanguageModel(kImageOptions);
     90  const blob = await (await fetch(kValidImagePath)).blob();
     91  const bitmap = await createImageBitmap(blob);
     92  const session = await createLanguageModel(kImageOptions);
     93  const result =
     94      await session.prompt(messageWithContent(kPrompt, 'image', bitmap));
     95  assert_regexp_match(result, /<image>/);
     96 }, 'Prompt with ImageBitmap image content');
     97 
     98 promise_test(async () => {
     99  await ensureLanguageModel(kImageOptions);
    100  const blob = await (await fetch(kValidImagePath)).blob();
    101  const bitmap = await createImageBitmap(blob);
    102  const frame = new VideoFrame(bitmap, {timestamp: 1});
    103  const session = await createLanguageModel(kImageOptions);
    104  const result =
    105      await session.prompt(messageWithContent(kPrompt, 'image', frame));
    106  frame.close();  // Avoid JS garbage collection warning.
    107  assert_regexp_match(result, /<image>/);
    108 }, 'Prompt with VideoFrame image content');
    109 
    110 promise_test(async () => {
    111  await ensureLanguageModel(kImageOptions);
    112  const canvas = new OffscreenCanvas(512, 512);
    113  // Requires a context to convert to a bitmap.
    114  var context = canvas.getContext('2d');
    115  context.fillRect(10, 10, 200, 200);
    116  const session = await createLanguageModel(kImageOptions);
    117  const result =
    118      await session.prompt(messageWithContent(kPrompt, 'image', canvas));
    119  assert_regexp_match(result, /<image>/);
    120 }, 'Prompt with OffscreenCanvas image content');
    121 
    122 promise_test(async () => {
    123  await ensureLanguageModel(kImageOptions);
    124  const session = await createLanguageModel(kImageOptions);
    125  const result = await session.prompt(
    126      messageWithContent(kPrompt, 'image', new ImageData(256, 256)));
    127  assert_regexp_match(result, /<image>/);
    128 }, 'Prompt with ImageData image content');
    129 
    130 promise_test(async () => {
    131  await ensureLanguageModel(kImageOptions);
    132  const newImage = new Image();
    133  newImage.src = kValidImagePath;
    134  const session = await createLanguageModel(kImageOptions);
    135  const result =
    136      await session.prompt(messageWithContent(kPrompt, 'image', newImage));
    137  assert_regexp_match(result, /<image>/);
    138 }, 'Prompt with HTMLImageElement image content');
    139 
    140 promise_test(async () => {
    141  await ensureLanguageModel(kImageOptions);
    142  var canvas = document.createElement('canvas');
    143  canvas.width = 1224;
    144  canvas.height = 768;
    145  const session = await createLanguageModel(kImageOptions);
    146  const result =
    147      await session.prompt(messageWithContent(kPrompt, 'image', canvas));
    148  assert_regexp_match(result, /<image>/);
    149 }, 'Prompt with HTMLCanvasElement image content');
    150 
    151 promise_test(async () => {
    152  await ensureLanguageModel(kImageOptions);
    153  const imageData = await fetch(kValidImagePath);
    154  const session = await createLanguageModel(kImageOptions);
    155  const result = await session.prompt(
    156      messageWithContent(kPrompt, 'image', await imageData.arrayBuffer()));
    157  assert_regexp_match(result, /<image>/);
    158 }, 'Prompt with ArrayBuffer image content');
    159 
    160 promise_test(async () => {
    161  await ensureLanguageModel(kImageOptions);
    162  const imageData = await fetch(kValidImagePath);
    163  const session = await createLanguageModel(kImageOptions);
    164  const result = await session.prompt(messageWithContent(
    165      kPrompt, 'image', new DataView(await imageData.arrayBuffer())));
    166  assert_regexp_match(result, /<image>/);
    167 }, 'Prompt with ArrayBufferView image content');
    168 
    169 promise_test(async (t) => {
    170  await ensureLanguageModel(kImageOptions);
    171  const imageData = await fetch(kValidImagePath);
    172  const session = await createLanguageModel(kImageOptions);
    173  const buffer = await imageData.arrayBuffer();
    174  // Add 256 bytes of padding in front of the image data.
    175  const bufferView = new Uint8Array(buffer);
    176  const newBufferArray = new ArrayBuffer(256 + buffer.byteLength);
    177  const imageView = new Uint8Array(newBufferArray, 256, buffer.byteLength);
    178  imageView.set(bufferView);
    179 
    180  const result =
    181      await session.prompt(messageWithContent(kPrompt, 'image', imageView));
    182  assert_regexp_match(result, /<image>/);
    183 
    184  // Offset causes 56 bytes of blank data, resulting in a decoding error.
    185  await promise_rejects_dom(
    186      t, 'InvalidStateError',
    187      session.prompt(messageWithContent(
    188          kPrompt, 'image',
    189          new Uint8Array(newBufferArray, 200, buffer.byteLength))));
    190 }, 'Prompt with ArrayBufferView image content with an offset.');
    191 
    192 
    193 promise_test(async () => {
    194  await ensureLanguageModel(kImageOptions);
    195  const newImage = new Image();
    196  newImage.src = kValidSVGImagePath;
    197  const session = await createLanguageModel(kImageOptions);
    198  const result =
    199      await session.prompt(messageWithContent(
    200        kPrompt, 'image', newImage));
    201  assert_regexp_match(result, /<image>/);
    202 }, 'Prompt with HTMLImageElement image content (with SVG)');
    203 
    204 
    205 promise_test(async () => {
    206  await ensureLanguageModel(kImageOptions);
    207  const svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg');
    208  svg.setAttribute('width', '100');
    209  svg.setAttribute('height', '100');
    210  const svgImage =
    211      document.createElementNS('http://www.w3.org/2000/svg', 'image');
    212  svgImage.setAttribute('href', kValidImagePath);
    213  svgImage.setAttribute('decoding', 'sync');
    214  svg.appendChild(svgImage);
    215  document.body.appendChild(svg);
    216 
    217  // Must wait for the SVG and image to load first.
    218  // TODO(crbug.com/417260923): Make prompt Api await the image to be loaded.
    219  const {promise, resolve} = Promise.withResolvers();
    220  svgImage.addEventListener('load', resolve);
    221  await promise;
    222  const session = await createLanguageModel(kImageOptions);
    223  const result =
    224      await session.prompt(messageWithContent(
    225        kPrompt, 'image', svgImage));
    226  assert_regexp_match(result, /<image>/);
    227 }, 'Prompt with SVGImageElement image content');
    228 
    229 promise_test(async () => {
    230  await ensureLanguageModel(kImageOptions);
    231  var video = document.createElement('video');
    232  video.src = kValidVideoPath;
    233  video.width = 1224;
    234  video.height = 768;
    235  // Video must have frames fetched. See crbug.com/417249941#comment3
    236  await video.play();
    237  const session = await createLanguageModel(kImageOptions);
    238  const result =
    239      await session.prompt(messageWithContent(
    240        kPrompt, 'image', video));
    241  assert_regexp_match(result, /<image>/);
    242 }, 'Prompt with HTMLVideoElement image content');
    243 
    244 /*****************************************
    245 * Audio tests
    246 *****************************************/
    247 
    248 promise_test(async (t) => {
    249  await ensureLanguageModel();
    250  const blob = await (await fetch(kValidAudioPath)).blob();
    251  const session = await createLanguageModel();
    252  return promise_rejects_dom(
    253      t, 'NotSupportedError',
    254      session.prompt(messageWithContent(kPrompt, 'audio', blob)));
    255 }, 'Prompt audio without `audio` expectedInput');
    256 
    257 promise_test(async () => {
    258  const blob = await (await fetch(kValidAudioPath)).blob();
    259  const options = {
    260    expectedInputs: [{type: 'audio'}],
    261    initialPrompts: messageWithContent(kPrompt, 'audio', blob)
    262  };
    263  await ensureLanguageModel(options);
    264  const session = await LanguageModel.create(options);
    265  const tokenLength = await session.measureInputUsage(options.initialPrompts);
    266  assert_greater_than(tokenLength, 0);
    267  assert_equals(session.inputUsage, tokenLength);
    268  assert_regexp_match(
    269      await session.prompt([{role: 'system', content: ''}]),
    270      /<audio>/);
    271 }, 'Test Audio initialPrompt');
    272 
    273 promise_test(async () => {
    274  await ensureLanguageModel(kAudioOptions);
    275  const blob = await (await fetch(kValidAudioPath)).blob();
    276  const session = await createLanguageModel(kAudioOptions);
    277  const result =
    278      await session.prompt(messageWithContent(kPrompt, 'audio', blob));
    279  assert_regexp_match(result, /<audio>/);
    280 }, 'Prompt with Blob audio content');
    281 
    282 promise_test(async (t) => {
    283  await ensureLanguageModel(kAudioOptions);
    284  const blob = await (await fetch(kValidImagePath)).blob();
    285  const session = await createLanguageModel(kAudioOptions);
    286  // TODO(crbug.com/409615288): Expect a TypeError according to the spec.
    287  return promise_rejects_dom(
    288      t, 'DataError',
    289      session.prompt(messageWithContent(kPrompt, 'audio', blob)));
    290 }, 'Prompt audio with blob containing invalid audio data.');
    291 
    292 promise_test(async () => {
    293  await ensureLanguageModel(kAudioOptions);
    294  const audio_data = await fetch(kValidAudioPath);
    295  const audioCtx = new AudioContext();
    296  const buffer = await audioCtx.decodeAudioData(await audio_data.arrayBuffer());
    297  const session = await createLanguageModel(kAudioOptions);
    298  const result =
    299      await session.prompt(messageWithContent(kPrompt, 'audio', buffer));
    300  assert_regexp_match(result, /<audio>/);
    301 }, 'Prompt with AudioBuffer');
    302 
    303 promise_test(async () => {
    304  await ensureLanguageModel(kAudioOptions);
    305  const audio_data = await fetch(kValidAudioPath);
    306  const session = await createLanguageModel(kAudioOptions);
    307  const result = await session.prompt(
    308      messageWithContent(kPrompt, 'audio', await audio_data.arrayBuffer()));
    309  assert_regexp_match(result, /<audio>/);
    310 }, 'Prompt with BufferSource - ArrayBuffer');