device_allocation.spec.ts (11912B)
1 export const description = ` 2 Stress tests for GPUAdapter.requestDevice. 3 `; 4 5 import { Fixture } from '../../common/framework/fixture.js'; 6 import { makeTestGroup } from '../../common/framework/test_group.js'; 7 import { attemptGarbageCollection } from '../../common/util/collect_garbage.js'; 8 import { keysOf } from '../../common/util/data_tables.js'; 9 import { getGPU } from '../../common/util/navigator_gpu.js'; 10 import { assert, iterRange } from '../../common/util/util.js'; 11 import { getDefaultLimitsForCTS } from '../../webgpu/capability_info.js'; 12 13 export const g = makeTestGroup(Fixture); 14 15 /** Adapter preference identifier to option. */ 16 const kAdapterTypeOptions: { 17 readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions; 18 } = 19 /* prettier-ignore */ { 20 'low-power': { powerPreference: 'low-power', forceFallbackAdapter: false }, 21 'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false }, 22 'fallback': { powerPreference: undefined, forceFallbackAdapter: true }, 23 }; 24 /** List of all adapter hint types. */ 25 const kAdapterTypes = keysOf(kAdapterTypeOptions); 26 27 /** 28 * Creates a device, a valid compute pipeline, valid resources for the pipeline, and 29 * ties them together into a set of compute commands ready to be submitted to the GPU 30 * queue. Does not submit the commands in order to make sure that all resources are 31 * kept alive until the device is destroyed. 32 */ 33 async function createDeviceAndComputeCommands(t: Fixture, adapter: GPUAdapter) { 34 // Constants are computed such that per run, this function should allocate roughly 2G 35 // worth of data. This should be sufficient as we run these creation functions many 36 // times. If the data backing the created objects is not recycled we should OOM. 37 const limitInfo = getDefaultLimitsForCTS(); 38 const kNumPipelines = 64; 39 const kNumBindgroups = 128; 40 const kNumBufferElements = 41 limitInfo.maxComputeWorkgroupSizeX.default * limitInfo.maxComputeWorkgroupSizeY.default; 42 const kBufferSize = kNumBufferElements * 4; 43 const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]); 44 45 const device: GPUDevice = await t.requestDeviceTracked(adapter); 46 const commands = []; 47 48 for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) { 49 const pipeline = device.createComputePipeline({ 50 layout: 'auto', 51 compute: { 52 module: device.createShaderModule({ 53 code: ` 54 struct Buffer { data: array<u32>, }; 55 56 @group(0) @binding(0) var<storage, read_write> buffer: Buffer; 57 @compute @workgroup_size(1) fn main( 58 @builtin(global_invocation_id) id: vec3<u32>) { 59 buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] = 60 buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] + 61 ${pipelineIndex}u; 62 } 63 `, 64 }), 65 entryPoint: 'main', 66 }, 67 }); 68 for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) { 69 const buffer = t.trackForCleanup( 70 device.createBuffer({ 71 size: kBufferSize, 72 usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, 73 }) 74 ); 75 device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length); 76 const bindgroup = device.createBindGroup({ 77 layout: pipeline.getBindGroupLayout(0), 78 entries: [{ binding: 0, resource: { buffer } }], 79 }); 80 81 const encoder = device.createCommandEncoder(); 82 const pass = encoder.beginComputePass(); 83 pass.setPipeline(pipeline); 84 pass.setBindGroup(0, bindgroup); 85 pass.dispatchWorkgroups( 86 limitInfo.maxComputeWorkgroupSizeX.default, 87 limitInfo.maxComputeWorkgroupSizeY.default 88 ); 89 pass.end(); 90 commands.push(encoder.finish()); 91 } 92 } 93 return { device, objects: commands }; 94 } 95 96 /** 97 * Creates a device, a valid render pipeline, valid resources for the pipeline, and 98 * ties them together into a set of render commands ready to be submitted to the GPU 99 * queue. Does not submit the commands in order to make sure that all resources are 100 * kept alive until the device is destroyed. 101 */ 102 async function createDeviceAndRenderCommands(t: Fixture, adapter: GPUAdapter) { 103 // Constants are computed such that per run, this function should allocate roughly 2G 104 // worth of data. This should be sufficient as we run these creation functions many 105 // times. If the data backing the created objects is not recycled we should OOM. 106 const kNumPipelines = 128; 107 const kNumBindgroups = 128; 108 const kSize = 128; 109 const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]); 110 111 const device: GPUDevice = await t.requestDeviceTracked(adapter); 112 const commands = []; 113 114 for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) { 115 const module = device.createShaderModule({ 116 code: ` 117 struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, }; 118 119 @group(0) @binding(0) var<uniform> buffer: Buffer; 120 @vertex fn vmain( 121 @builtin(vertex_index) vertexIndex: u32 122 ) -> @builtin(position) vec4<f32> { 123 let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u]; 124 let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u)); 125 let r = vec2<f32>(1.0 / f32(${kSize})); 126 let a = 2.0 * r; 127 let b = r - vec2<f32>(1.0); 128 return vec4<f32>(fma(position, a, b), 0.0, 1.0); 129 } 130 131 @fragment fn fmain() -> @location(0) vec4<f32> { 132 return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0); 133 } 134 `, 135 }); 136 const pipeline = device.createRenderPipeline({ 137 layout: device.createPipelineLayout({ 138 bindGroupLayouts: [ 139 device.createBindGroupLayout({ 140 entries: [ 141 { 142 binding: 0, 143 visibility: GPUShaderStage.VERTEX, 144 buffer: { type: 'uniform' }, 145 }, 146 ], 147 }), 148 ], 149 }), 150 vertex: { module, entryPoint: 'vmain', buffers: [] }, 151 primitive: { topology: 'point-list' }, 152 fragment: { 153 targets: [{ format: 'rgba8unorm' }], 154 module, 155 entryPoint: 'fmain', 156 }, 157 }); 158 for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) { 159 const buffer = t.trackForCleanup( 160 device.createBuffer({ 161 size: kSize * kSize * 4, 162 usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, 163 }) 164 ); 165 device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length); 166 const bindgroup = device.createBindGroup({ 167 layout: pipeline.getBindGroupLayout(0), 168 entries: [{ binding: 0, resource: { buffer } }], 169 }); 170 const texture = t.trackForCleanup( 171 device.createTexture({ 172 size: [kSize, kSize], 173 usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, 174 format: 'rgba8unorm', 175 }) 176 ); 177 178 const encoder = device.createCommandEncoder(); 179 const pass = encoder.beginRenderPass({ 180 colorAttachments: [ 181 { 182 view: texture.createView(), 183 loadOp: 'load', 184 storeOp: 'store', 185 }, 186 ], 187 }); 188 pass.setPipeline(pipeline); 189 pass.setBindGroup(0, bindgroup); 190 pass.draw(kSize * kSize); 191 pass.end(); 192 commands.push(encoder.finish()); 193 } 194 } 195 return { device, objects: commands }; 196 } 197 198 /** 199 * Creates a device and a large number of buffers which are immediately written to. The 200 * buffers are expected to be kept alive until they or the device are destroyed. 201 */ 202 async function createDeviceAndBuffers(t: Fixture, adapter: GPUAdapter) { 203 // Currently we just allocate 2G of memory using 512MB blocks. We may be able to 204 // increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause 205 // kernel panics at the moment, and it can greatly increase the time needed. 206 const kTotalMemorySize = 2 * 1024 * 1024 * 1024; 207 const kMemoryBlockSize = 512 * 1024 * 1024; 208 const kMemoryBlockData = new Uint8Array(kMemoryBlockSize); 209 210 const device: GPUDevice = await t.requestDeviceTracked(adapter); 211 const buffers = []; 212 for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) { 213 const buffer = t.trackForCleanup( 214 device.createBuffer({ 215 size: kMemoryBlockSize, 216 usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, 217 }) 218 ); 219 220 // Write out to the buffer to make sure that it has backing memory. 221 device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length); 222 buffers.push(buffer); 223 } 224 return { device, objects: buffers }; 225 } 226 227 g.test('coexisting') 228 .desc(`Tests allocation of many coexisting GPUDevice objects.`) 229 .params(u => u.combine('adapterType', kAdapterTypes)) 230 .fn(async t => { 231 const { adapterType } = t.params; 232 const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]); 233 assert(adapter !== null, 'Failed to get adapter.'); 234 235 // Based on Vulkan conformance test requirement to be able to create multiple devices. 236 const kNumDevices = 5; 237 238 const devices = []; 239 for (let i = 0; i < kNumDevices; ++i) { 240 const device = await t.requestDeviceTracked(adapter); 241 devices.push(device); 242 } 243 }); 244 245 g.test('continuous,with_destroy') 246 .desc( 247 `Tests allocation and destruction of many GPUDevice objects over time. Device objects 248 are sequentially requested with a series of device allocated objects created on each 249 device. The devices are then destroyed to verify that the device and the device allocated 250 objects are recycled over a very large number of iterations.` 251 ) 252 .params(u => u.combine('adapterType', kAdapterTypes)) 253 .fn(async t => { 254 const { adapterType } = t.params; 255 const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]); 256 assert(adapter !== null, 'Failed to get adapter.'); 257 258 // Since devices are being destroyed, we should be able to create many devices. 259 const kNumDevices = 100; 260 const kFunctions = [ 261 createDeviceAndBuffers, 262 createDeviceAndComputeCommands, 263 createDeviceAndRenderCommands, 264 ]; 265 266 const deviceList = []; 267 const objectLists = []; 268 for (let i = 0; i < kNumDevices; ++i) { 269 const { device, objects } = await kFunctions[i % kFunctions.length](t, adapter); 270 t.expect(objects.length > 0, 'unable to allocate any objects'); 271 deviceList.push(device); 272 objectLists.push(objects); 273 device.destroy(); 274 } 275 }); 276 277 g.test('continuous,no_destroy') 278 .desc( 279 `Tests allocation and implicit GC of many GPUDevice objects over time. Objects are 280 sequentially requested and dropped for GC over a very large number of iterations. Note 281 that without destroy, we do not create device allocated objects because that will 282 implicitly keep the device in scope.` 283 ) 284 .params(u => u.combine('adapterType', kAdapterTypes)) 285 .fn(async t => { 286 const { adapterType } = t.params; 287 const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]); 288 assert(adapter !== null, 'Failed to get adapter.'); 289 290 const kNumDevices = 10_000; 291 for (let i = 1; i <= kNumDevices; ++i) { 292 await (async () => { 293 // No trackForCleanup because it would prevent the GPUDevice from being GCed. 294 // eslint-disable-next-line no-restricted-syntax 295 t.expect((await adapter.requestDevice()) !== null, 'unexpected null device'); 296 })(); 297 if (i % 10 === 0) { 298 // We need to occasionally wait for GC to clear out stale devices. 299 await attemptGarbageCollection(); 300 } 301 } 302 });