language-model-prompt-multimodal.tentative.https.window.js (12028B)
1 // META: title=Language Model Prompt Multimodal 2 // META: script=/resources/testdriver.js 3 // META: script=../resources/util.js 4 // META: timeout=long 5 6 'use strict'; 7 8 const kPrompt = 'describe this'; 9 const kValidImagePath = '/images/computer.jpg'; 10 const kValidAudioPath = '/media/speech.wav'; 11 const kValidSVGImagePath = '/images/pattern.svg'; 12 const kValidVideoPath = '/media/test.webm'; 13 14 const kImageOptions = {expectedInputs: [{type: 'image'}]}; 15 const kAudioOptions = {expectedInputs: [{type: 'audio'}]}; 16 17 function messageWithContent(prompt, type, value) { 18 return [{ 19 role: 'user', 20 content: [{type: 'text', value: prompt}, {type: type, value: value}] 21 }]; 22 } 23 24 /***************************************** 25 * General tests 26 *****************************************/ 27 28 promise_test(async t => { 29 await ensureLanguageModel(kImageOptions); 30 const newImage = new Image(); 31 newImage.src = kValidImagePath; 32 const session = await createLanguageModel(kImageOptions); 33 // TODO(crbug.com/409615288): Expect a TypeError according to the spec. 34 return promise_rejects_dom( 35 t, 'SyntaxError', 36 session.prompt(messageWithContent(kPrompt, 'text', newImage))); 37 }, 'Prompt with type:"text" and image content should reject'); 38 39 promise_test(async t => { 40 await ensureLanguageModel(kImageOptions); 41 const newImage = new Image(); 42 newImage.src = kValidImagePath; 43 const session = await createLanguageModel(kImageOptions); 44 return promise_rejects_dom(t, 'NotSupportedError', session.prompt([ 45 {role: 'assistant', content: [{type: 'image', value: newImage}]} 46 ])); 47 }, 'Prompt with assistant role should reject with multimodal input'); 48 49 /***************************************** 50 * Image tests 51 *****************************************/ 52 53 promise_test(async (t) => { 54 await ensureLanguageModel(); 55 const newImage = new Image(); 56 newImage.src = kValidImagePath; 57 const session = await createLanguageModel(); 58 return promise_rejects_dom( 59 t, 'NotSupportedError', 60 session.prompt(messageWithContent(kPrompt, 'image', newImage))); 61 }, 'Prompt image without `image` expectedInput'); 62 63 promise_test(async () => { 64 const blob = await (await fetch(kValidImagePath)).blob(); 65 const options = { 66 expectedInputs: [{type: 'image'}], 67 initialPrompts: messageWithContent(kPrompt, 'image', blob) 68 }; 69 await ensureLanguageModel(options); 70 const session = await LanguageModel.create(options); 71 const tokenLength = await session.measureInputUsage(options.initialPrompts); 72 assert_greater_than(tokenLength, 0); 73 assert_equals(session.inputUsage, tokenLength); 74 assert_regexp_match( 75 await session.prompt([{role: 'system', content: ''}]), 76 /<image>/); 77 }, 'Test Image initialPrompt'); 78 79 promise_test(async () => { 80 await ensureLanguageModel(kImageOptions); 81 const blob = await (await fetch(kValidImagePath)).blob(); 82 const session = await createLanguageModel(kImageOptions); 83 const result = 84 await session.prompt(messageWithContent(kPrompt, 'image', blob)); 85 assert_regexp_match(result, /<image>/); 86 }, 'Prompt with Blob image content'); 87 88 promise_test(async () => { 89 await ensureLanguageModel(kImageOptions); 90 const blob = await (await fetch(kValidImagePath)).blob(); 91 const bitmap = await createImageBitmap(blob); 92 const session = await createLanguageModel(kImageOptions); 93 const result = 94 await session.prompt(messageWithContent(kPrompt, 'image', bitmap)); 95 assert_regexp_match(result, /<image>/); 96 }, 'Prompt with ImageBitmap image content'); 97 98 promise_test(async () => { 99 await ensureLanguageModel(kImageOptions); 100 const blob = await (await fetch(kValidImagePath)).blob(); 101 const bitmap = await createImageBitmap(blob); 102 const frame = new VideoFrame(bitmap, {timestamp: 1}); 103 const session = await createLanguageModel(kImageOptions); 104 const result = 105 await session.prompt(messageWithContent(kPrompt, 'image', frame)); 106 frame.close(); // Avoid JS garbage collection warning. 107 assert_regexp_match(result, /<image>/); 108 }, 'Prompt with VideoFrame image content'); 109 110 promise_test(async () => { 111 await ensureLanguageModel(kImageOptions); 112 const canvas = new OffscreenCanvas(512, 512); 113 // Requires a context to convert to a bitmap. 114 var context = canvas.getContext('2d'); 115 context.fillRect(10, 10, 200, 200); 116 const session = await createLanguageModel(kImageOptions); 117 const result = 118 await session.prompt(messageWithContent(kPrompt, 'image', canvas)); 119 assert_regexp_match(result, /<image>/); 120 }, 'Prompt with OffscreenCanvas image content'); 121 122 promise_test(async () => { 123 await ensureLanguageModel(kImageOptions); 124 const session = await createLanguageModel(kImageOptions); 125 const result = await session.prompt( 126 messageWithContent(kPrompt, 'image', new ImageData(256, 256))); 127 assert_regexp_match(result, /<image>/); 128 }, 'Prompt with ImageData image content'); 129 130 promise_test(async () => { 131 await ensureLanguageModel(kImageOptions); 132 const newImage = new Image(); 133 newImage.src = kValidImagePath; 134 const session = await createLanguageModel(kImageOptions); 135 const result = 136 await session.prompt(messageWithContent(kPrompt, 'image', newImage)); 137 assert_regexp_match(result, /<image>/); 138 }, 'Prompt with HTMLImageElement image content'); 139 140 promise_test(async () => { 141 await ensureLanguageModel(kImageOptions); 142 var canvas = document.createElement('canvas'); 143 canvas.width = 1224; 144 canvas.height = 768; 145 const session = await createLanguageModel(kImageOptions); 146 const result = 147 await session.prompt(messageWithContent(kPrompt, 'image', canvas)); 148 assert_regexp_match(result, /<image>/); 149 }, 'Prompt with HTMLCanvasElement image content'); 150 151 promise_test(async () => { 152 await ensureLanguageModel(kImageOptions); 153 const imageData = await fetch(kValidImagePath); 154 const session = await createLanguageModel(kImageOptions); 155 const result = await session.prompt( 156 messageWithContent(kPrompt, 'image', await imageData.arrayBuffer())); 157 assert_regexp_match(result, /<image>/); 158 }, 'Prompt with ArrayBuffer image content'); 159 160 promise_test(async () => { 161 await ensureLanguageModel(kImageOptions); 162 const imageData = await fetch(kValidImagePath); 163 const session = await createLanguageModel(kImageOptions); 164 const result = await session.prompt(messageWithContent( 165 kPrompt, 'image', new DataView(await imageData.arrayBuffer()))); 166 assert_regexp_match(result, /<image>/); 167 }, 'Prompt with ArrayBufferView image content'); 168 169 promise_test(async (t) => { 170 await ensureLanguageModel(kImageOptions); 171 const imageData = await fetch(kValidImagePath); 172 const session = await createLanguageModel(kImageOptions); 173 const buffer = await imageData.arrayBuffer(); 174 // Add 256 bytes of padding in front of the image data. 175 const bufferView = new Uint8Array(buffer); 176 const newBufferArray = new ArrayBuffer(256 + buffer.byteLength); 177 const imageView = new Uint8Array(newBufferArray, 256, buffer.byteLength); 178 imageView.set(bufferView); 179 180 const result = 181 await session.prompt(messageWithContent(kPrompt, 'image', imageView)); 182 assert_regexp_match(result, /<image>/); 183 184 // Offset causes 56 bytes of blank data, resulting in a decoding error. 185 await promise_rejects_dom( 186 t, 'InvalidStateError', 187 session.prompt(messageWithContent( 188 kPrompt, 'image', 189 new Uint8Array(newBufferArray, 200, buffer.byteLength)))); 190 }, 'Prompt with ArrayBufferView image content with an offset.'); 191 192 193 promise_test(async () => { 194 await ensureLanguageModel(kImageOptions); 195 const newImage = new Image(); 196 newImage.src = kValidSVGImagePath; 197 const session = await createLanguageModel(kImageOptions); 198 const result = 199 await session.prompt(messageWithContent( 200 kPrompt, 'image', newImage)); 201 assert_regexp_match(result, /<image>/); 202 }, 'Prompt with HTMLImageElement image content (with SVG)'); 203 204 205 promise_test(async () => { 206 await ensureLanguageModel(kImageOptions); 207 const svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg'); 208 svg.setAttribute('width', '100'); 209 svg.setAttribute('height', '100'); 210 const svgImage = 211 document.createElementNS('http://www.w3.org/2000/svg', 'image'); 212 svgImage.setAttribute('href', kValidImagePath); 213 svgImage.setAttribute('decoding', 'sync'); 214 svg.appendChild(svgImage); 215 document.body.appendChild(svg); 216 217 // Must wait for the SVG and image to load first. 218 // TODO(crbug.com/417260923): Make prompt Api await the image to be loaded. 219 const {promise, resolve} = Promise.withResolvers(); 220 svgImage.addEventListener('load', resolve); 221 await promise; 222 const session = await createLanguageModel(kImageOptions); 223 const result = 224 await session.prompt(messageWithContent( 225 kPrompt, 'image', svgImage)); 226 assert_regexp_match(result, /<image>/); 227 }, 'Prompt with SVGImageElement image content'); 228 229 promise_test(async () => { 230 await ensureLanguageModel(kImageOptions); 231 var video = document.createElement('video'); 232 video.src = kValidVideoPath; 233 video.width = 1224; 234 video.height = 768; 235 // Video must have frames fetched. See crbug.com/417249941#comment3 236 await video.play(); 237 const session = await createLanguageModel(kImageOptions); 238 const result = 239 await session.prompt(messageWithContent( 240 kPrompt, 'image', video)); 241 assert_regexp_match(result, /<image>/); 242 }, 'Prompt with HTMLVideoElement image content'); 243 244 /***************************************** 245 * Audio tests 246 *****************************************/ 247 248 promise_test(async (t) => { 249 await ensureLanguageModel(); 250 const blob = await (await fetch(kValidAudioPath)).blob(); 251 const session = await createLanguageModel(); 252 return promise_rejects_dom( 253 t, 'NotSupportedError', 254 session.prompt(messageWithContent(kPrompt, 'audio', blob))); 255 }, 'Prompt audio without `audio` expectedInput'); 256 257 promise_test(async () => { 258 const blob = await (await fetch(kValidAudioPath)).blob(); 259 const options = { 260 expectedInputs: [{type: 'audio'}], 261 initialPrompts: messageWithContent(kPrompt, 'audio', blob) 262 }; 263 await ensureLanguageModel(options); 264 const session = await LanguageModel.create(options); 265 const tokenLength = await session.measureInputUsage(options.initialPrompts); 266 assert_greater_than(tokenLength, 0); 267 assert_equals(session.inputUsage, tokenLength); 268 assert_regexp_match( 269 await session.prompt([{role: 'system', content: ''}]), 270 /<audio>/); 271 }, 'Test Audio initialPrompt'); 272 273 promise_test(async () => { 274 await ensureLanguageModel(kAudioOptions); 275 const blob = await (await fetch(kValidAudioPath)).blob(); 276 const session = await createLanguageModel(kAudioOptions); 277 const result = 278 await session.prompt(messageWithContent(kPrompt, 'audio', blob)); 279 assert_regexp_match(result, /<audio>/); 280 }, 'Prompt with Blob audio content'); 281 282 promise_test(async (t) => { 283 await ensureLanguageModel(kAudioOptions); 284 const blob = await (await fetch(kValidImagePath)).blob(); 285 const session = await createLanguageModel(kAudioOptions); 286 // TODO(crbug.com/409615288): Expect a TypeError according to the spec. 287 return promise_rejects_dom( 288 t, 'DataError', 289 session.prompt(messageWithContent(kPrompt, 'audio', blob))); 290 }, 'Prompt audio with blob containing invalid audio data.'); 291 292 promise_test(async () => { 293 await ensureLanguageModel(kAudioOptions); 294 const audio_data = await fetch(kValidAudioPath); 295 const audioCtx = new AudioContext(); 296 const buffer = await audioCtx.decodeAudioData(await audio_data.arrayBuffer()); 297 const session = await createLanguageModel(kAudioOptions); 298 const result = 299 await session.prompt(messageWithContent(kPrompt, 'audio', buffer)); 300 assert_regexp_match(result, /<audio>/); 301 }, 'Prompt with AudioBuffer'); 302 303 promise_test(async () => { 304 await ensureLanguageModel(kAudioOptions); 305 const audio_data = await fetch(kValidAudioPath); 306 const session = await createLanguageModel(kAudioOptions); 307 const result = await session.prompt( 308 messageWithContent(kPrompt, 'audio', await audio_data.arrayBuffer())); 309 assert_regexp_match(result, /<audio>/); 310 }, 'Prompt with BufferSource - ArrayBuffer');