tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

device_allocation.spec.ts (11912B)


      1 export const description = `
      2 Stress tests for GPUAdapter.requestDevice.
      3 `;
      4 
      5 import { Fixture } from '../../common/framework/fixture.js';
      6 import { makeTestGroup } from '../../common/framework/test_group.js';
      7 import { attemptGarbageCollection } from '../../common/util/collect_garbage.js';
      8 import { keysOf } from '../../common/util/data_tables.js';
      9 import { getGPU } from '../../common/util/navigator_gpu.js';
     10 import { assert, iterRange } from '../../common/util/util.js';
     11 import { getDefaultLimitsForCTS } from '../../webgpu/capability_info.js';
     12 
     13 export const g = makeTestGroup(Fixture);
     14 
     15 /** Adapter preference identifier to option. */
     16 const kAdapterTypeOptions: {
     17  readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions;
     18 } =
     19  /* prettier-ignore */ {
     20  'low-power':        { powerPreference:        'low-power', forceFallbackAdapter: false },
     21  'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false },
     22  'fallback':         { powerPreference:          undefined, forceFallbackAdapter:  true },
     23 };
     24 /** List of all adapter hint types. */
     25 const kAdapterTypes = keysOf(kAdapterTypeOptions);
     26 
     27 /**
     28 * Creates a device, a valid compute pipeline, valid resources for the pipeline, and
     29 * ties them together into a set of compute commands ready to be submitted to the GPU
     30 * queue. Does not submit the commands in order to make sure that all resources are
     31 * kept alive until the device is destroyed.
     32 */
     33 async function createDeviceAndComputeCommands(t: Fixture, adapter: GPUAdapter) {
     34  // Constants are computed such that per run, this function should allocate roughly 2G
     35  // worth of data. This should be sufficient as we run these creation functions many
     36  // times. If the data backing the created objects is not recycled we should OOM.
     37  const limitInfo = getDefaultLimitsForCTS();
     38  const kNumPipelines = 64;
     39  const kNumBindgroups = 128;
     40  const kNumBufferElements =
     41    limitInfo.maxComputeWorkgroupSizeX.default * limitInfo.maxComputeWorkgroupSizeY.default;
     42  const kBufferSize = kNumBufferElements * 4;
     43  const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]);
     44 
     45  const device: GPUDevice = await t.requestDeviceTracked(adapter);
     46  const commands = [];
     47 
     48  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
     49    const pipeline = device.createComputePipeline({
     50      layout: 'auto',
     51      compute: {
     52        module: device.createShaderModule({
     53          code: `
     54              struct Buffer { data: array<u32>, };
     55 
     56              @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
     57              @compute @workgroup_size(1) fn main(
     58                  @builtin(global_invocation_id) id: vec3<u32>) {
     59                buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] =
     60                  buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] +
     61                    ${pipelineIndex}u;
     62              }
     63            `,
     64        }),
     65        entryPoint: 'main',
     66      },
     67    });
     68    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
     69      const buffer = t.trackForCleanup(
     70        device.createBuffer({
     71          size: kBufferSize,
     72          usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
     73        })
     74      );
     75      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
     76      const bindgroup = device.createBindGroup({
     77        layout: pipeline.getBindGroupLayout(0),
     78        entries: [{ binding: 0, resource: { buffer } }],
     79      });
     80 
     81      const encoder = device.createCommandEncoder();
     82      const pass = encoder.beginComputePass();
     83      pass.setPipeline(pipeline);
     84      pass.setBindGroup(0, bindgroup);
     85      pass.dispatchWorkgroups(
     86        limitInfo.maxComputeWorkgroupSizeX.default,
     87        limitInfo.maxComputeWorkgroupSizeY.default
     88      );
     89      pass.end();
     90      commands.push(encoder.finish());
     91    }
     92  }
     93  return { device, objects: commands };
     94 }
     95 
     96 /**
     97 * Creates a device, a valid render pipeline, valid resources for the pipeline, and
     98 * ties them together into a set of render commands ready to be submitted to the GPU
     99 * queue. Does not submit the commands in order to make sure that all resources are
    100 * kept alive until the device is destroyed.
    101 */
    102 async function createDeviceAndRenderCommands(t: Fixture, adapter: GPUAdapter) {
    103  // Constants are computed such that per run, this function should allocate roughly 2G
    104  // worth of data. This should be sufficient as we run these creation functions many
    105  // times. If the data backing the created objects is not recycled we should OOM.
    106  const kNumPipelines = 128;
    107  const kNumBindgroups = 128;
    108  const kSize = 128;
    109  const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]);
    110 
    111  const device: GPUDevice = await t.requestDeviceTracked(adapter);
    112  const commands = [];
    113 
    114  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
    115    const module = device.createShaderModule({
    116      code: `
    117          struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, };
    118 
    119          @group(0) @binding(0) var<uniform> buffer: Buffer;
    120          @vertex fn vmain(
    121            @builtin(vertex_index) vertexIndex: u32
    122          ) -> @builtin(position) vec4<f32> {
    123            let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u];
    124            let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
    125            let r = vec2<f32>(1.0 / f32(${kSize}));
    126            let a = 2.0 * r;
    127            let b = r - vec2<f32>(1.0);
    128            return vec4<f32>(fma(position, a, b), 0.0, 1.0);
    129          }
    130 
    131          @fragment fn fmain() -> @location(0) vec4<f32> {
    132            return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0);
    133          }
    134        `,
    135    });
    136    const pipeline = device.createRenderPipeline({
    137      layout: device.createPipelineLayout({
    138        bindGroupLayouts: [
    139          device.createBindGroupLayout({
    140            entries: [
    141              {
    142                binding: 0,
    143                visibility: GPUShaderStage.VERTEX,
    144                buffer: { type: 'uniform' },
    145              },
    146            ],
    147          }),
    148        ],
    149      }),
    150      vertex: { module, entryPoint: 'vmain', buffers: [] },
    151      primitive: { topology: 'point-list' },
    152      fragment: {
    153        targets: [{ format: 'rgba8unorm' }],
    154        module,
    155        entryPoint: 'fmain',
    156      },
    157    });
    158    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
    159      const buffer = t.trackForCleanup(
    160        device.createBuffer({
    161          size: kSize * kSize * 4,
    162          usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
    163        })
    164      );
    165      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
    166      const bindgroup = device.createBindGroup({
    167        layout: pipeline.getBindGroupLayout(0),
    168        entries: [{ binding: 0, resource: { buffer } }],
    169      });
    170      const texture = t.trackForCleanup(
    171        device.createTexture({
    172          size: [kSize, kSize],
    173          usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
    174          format: 'rgba8unorm',
    175        })
    176      );
    177 
    178      const encoder = device.createCommandEncoder();
    179      const pass = encoder.beginRenderPass({
    180        colorAttachments: [
    181          {
    182            view: texture.createView(),
    183            loadOp: 'load',
    184            storeOp: 'store',
    185          },
    186        ],
    187      });
    188      pass.setPipeline(pipeline);
    189      pass.setBindGroup(0, bindgroup);
    190      pass.draw(kSize * kSize);
    191      pass.end();
    192      commands.push(encoder.finish());
    193    }
    194  }
    195  return { device, objects: commands };
    196 }
    197 
    198 /**
    199 * Creates a device and a large number of buffers which are immediately written to. The
    200 * buffers are expected to be kept alive until they or the device are destroyed.
    201 */
    202 async function createDeviceAndBuffers(t: Fixture, adapter: GPUAdapter) {
    203  // Currently we just allocate 2G of memory using 512MB blocks. We may be able to
    204  // increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause
    205  // kernel panics at the moment, and it can greatly increase the time needed.
    206  const kTotalMemorySize = 2 * 1024 * 1024 * 1024;
    207  const kMemoryBlockSize = 512 * 1024 * 1024;
    208  const kMemoryBlockData = new Uint8Array(kMemoryBlockSize);
    209 
    210  const device: GPUDevice = await t.requestDeviceTracked(adapter);
    211  const buffers = [];
    212  for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) {
    213    const buffer = t.trackForCleanup(
    214      device.createBuffer({
    215        size: kMemoryBlockSize,
    216        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
    217      })
    218    );
    219 
    220    // Write out to the buffer to make sure that it has backing memory.
    221    device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length);
    222    buffers.push(buffer);
    223  }
    224  return { device, objects: buffers };
    225 }
    226 
    227 g.test('coexisting')
    228  .desc(`Tests allocation of many coexisting GPUDevice objects.`)
    229  .params(u => u.combine('adapterType', kAdapterTypes))
    230  .fn(async t => {
    231    const { adapterType } = t.params;
    232    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
    233    assert(adapter !== null, 'Failed to get adapter.');
    234 
    235    // Based on Vulkan conformance test requirement to be able to create multiple devices.
    236    const kNumDevices = 5;
    237 
    238    const devices = [];
    239    for (let i = 0; i < kNumDevices; ++i) {
    240      const device = await t.requestDeviceTracked(adapter);
    241      devices.push(device);
    242    }
    243  });
    244 
    245 g.test('continuous,with_destroy')
    246  .desc(
    247    `Tests allocation and destruction of many GPUDevice objects over time. Device objects
    248 are sequentially requested with a series of device allocated objects created on each
    249 device. The devices are then destroyed to verify that the device and the device allocated
    250 objects are recycled over a very large number of iterations.`
    251  )
    252  .params(u => u.combine('adapterType', kAdapterTypes))
    253  .fn(async t => {
    254    const { adapterType } = t.params;
    255    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
    256    assert(adapter !== null, 'Failed to get adapter.');
    257 
    258    // Since devices are being destroyed, we should be able to create many devices.
    259    const kNumDevices = 100;
    260    const kFunctions = [
    261      createDeviceAndBuffers,
    262      createDeviceAndComputeCommands,
    263      createDeviceAndRenderCommands,
    264    ];
    265 
    266    const deviceList = [];
    267    const objectLists = [];
    268    for (let i = 0; i < kNumDevices; ++i) {
    269      const { device, objects } = await kFunctions[i % kFunctions.length](t, adapter);
    270      t.expect(objects.length > 0, 'unable to allocate any objects');
    271      deviceList.push(device);
    272      objectLists.push(objects);
    273      device.destroy();
    274    }
    275  });
    276 
    277 g.test('continuous,no_destroy')
    278  .desc(
    279    `Tests allocation and implicit GC of many GPUDevice objects over time. Objects are
    280 sequentially requested and dropped for GC over a very large number of iterations. Note
    281 that without destroy, we do not create device allocated objects because that will
    282 implicitly keep the device in scope.`
    283  )
    284  .params(u => u.combine('adapterType', kAdapterTypes))
    285  .fn(async t => {
    286    const { adapterType } = t.params;
    287    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
    288    assert(adapter !== null, 'Failed to get adapter.');
    289 
    290    const kNumDevices = 10_000;
    291    for (let i = 1; i <= kNumDevices; ++i) {
    292      await (async () => {
    293        // No trackForCleanup because it would prevent the GPUDevice from being GCed.
    294        // eslint-disable-next-line no-restricted-syntax
    295        t.expect((await adapter.requestDevice()) !== null, 'unexpected null device');
    296      })();
    297      if (i % 10 === 0) {
    298        // We need to occasionally wait for GC to clear out stale devices.
    299        await attemptGarbageCollection();
    300      }
    301    }
    302  });