Compare commits

...

2 Commits

Author SHA1 Message Date
ReinUsesLisp
622830f4e1 maxwell_3d: Use insert instead of loop push_back
This reduces the overhead of bounds checking on each element.
It won't reduce the cost of allocation because usually this vector's
capacity is usually large enough to hold whatever we push to it.
2020-11-11 19:52:19 -03:00
ReinUsesLisp
9ea8cffe35 maxwell_3d: Move code to separate functions
Deduplicate some code and put it in separate functions so it's easier to
understand and profile.
2020-11-11 19:52:19 -03:00
2 changed files with 122 additions and 151 deletions

View File

@@ -124,6 +124,112 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
executing_macro = method;
}
macro_params.insert(macro_params.end(), base_start, base_start + amount);
// Call the macro when there are no more parameters in the command buffer
if (is_last_call) {
CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
}
}
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
// Keep track of the register value in shadow_state when requested.
const auto control = shadow_state.shadow_ram_control;
if (control == Regs::ShadowRamControl::Track ||
control == Regs::ShadowRamControl::TrackWithFilter) {
shadow_state.reg_array[method] = argument;
return argument;
}
if (control == Regs::ShadowRamControl::Replay) {
return shadow_state.reg_array[method];
}
return argument;
}
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
if (regs.reg_array[method] == argument) {
return;
}
regs.reg_array[method] = argument;
for (const auto& table : dirty.tables) {
dirty.flags[table[method]] = true;
}
}
void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
bool is_last_call) {
switch (method) {
case MAXWELL3D_REG_INDEX(wait_for_idle):
return rasterizer->WaitForIdle();
case MAXWELL3D_REG_INDEX(shadow_ram_control):
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
return;
case MAXWELL3D_REG_INDEX(macros.data):
return macro_engine->AddCode(regs.macros.upload_address, argument);
case MAXWELL3D_REG_INDEX(macros.bind):
return ProcessMacroBind(argument);
case MAXWELL3D_REG_INDEX(firmware[4]):
return ProcessFirmwareCall4();
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
return StartCBData(method);
case MAXWELL3D_REG_INDEX(cb_bind[0]):
return ProcessCBBind(0);
case MAXWELL3D_REG_INDEX(cb_bind[1]):
return ProcessCBBind(1);
case MAXWELL3D_REG_INDEX(cb_bind[2]):
return ProcessCBBind(2);
case MAXWELL3D_REG_INDEX(cb_bind[3]):
return ProcessCBBind(3);
case MAXWELL3D_REG_INDEX(cb_bind[4]):
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
return ProcessQueryGet();
case MAXWELL3D_REG_INDEX(condition.mode):
return ProcessQueryCondition();
case MAXWELL3D_REG_INDEX(counter_reset):
return ProcessCounterReset();
case MAXWELL3D_REG_INDEX(sync_info):
return ProcessSyncPoint();
case MAXWELL3D_REG_INDEX(exec_upload):
return upload_state.ProcessExec(regs.exec_upload.linear != 0);
case MAXWELL3D_REG_INDEX(data_upload):
upload_state.ProcessData(argument, is_last_call);
if (is_last_call) {
OnMemoryWrite();
}
return;
}
}
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
// Reset the current macro.
executing_macro = 0;
@@ -157,142 +263,16 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
// We're trying to execute a macro
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
executing_macro = method;
}
macro_params.push_back(method_argument);
// Call the macro when there are no more parameters in the command buffer
if (is_last_call) {
CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
}
ProcessMacro(method, &method_argument, 1, is_last_call);
return;
}
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
u32 arg = method_argument;
// Keep track of the register value in shadow_state when requested.
if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
shadow_state.reg_array[method] = arg;
} else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) {
arg = shadow_state.reg_array[method];
}
if (regs.reg_array[method] != arg) {
regs.reg_array[method] = arg;
for (const auto& table : dirty.tables) {
dirty.flags[table[method]] = true;
}
}
switch (method) {
case MAXWELL3D_REG_INDEX(wait_for_idle): {
rasterizer->WaitForIdle();
break;
}
case MAXWELL3D_REG_INDEX(shadow_ram_control): {
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument);
break;
}
case MAXWELL3D_REG_INDEX(macros.data): {
macro_engine->AddCode(regs.macros.upload_address, arg);
break;
}
case MAXWELL3D_REG_INDEX(macros.bind): {
ProcessMacroBind(arg);
break;
}
case MAXWELL3D_REG_INDEX(firmware[4]): {
ProcessFirmwareCall4();
break;
}
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
StartCBData(method);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[0]): {
ProcessCBBind(0);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[1]): {
ProcessCBBind(1);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[2]): {
ProcessCBBind(2);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[3]): {
ProcessCBBind(3);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[4]): {
ProcessCBBind(4);
break;
}
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): {
DrawArrays();
break;
}
case MAXWELL3D_REG_INDEX(clear_buffers): {
ProcessClearBuffers();
break;
}
case MAXWELL3D_REG_INDEX(query.query_get): {
ProcessQueryGet();
break;
}
case MAXWELL3D_REG_INDEX(condition.mode): {
ProcessQueryCondition();
break;
}
case MAXWELL3D_REG_INDEX(counter_reset): {
ProcessCounterReset();
break;
}
case MAXWELL3D_REG_INDEX(sync_info): {
ProcessSyncPoint();
break;
}
case MAXWELL3D_REG_INDEX(exec_upload): {
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case MAXWELL3D_REG_INDEX(data_upload): {
upload_state.ProcessData(arg, is_last_call);
if (is_last_call) {
OnMemoryWrite();
}
break;
}
default:
break;
}
const u32 argument = ProcessShadowRam(method, method_argument);
ProcessDirtyRegisters(method, argument);
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
@@ -300,23 +280,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
// We're trying to execute a macro
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
executing_macro = method;
}
for (std::size_t i = 0; i < amount; i++) {
macro_params.push_back(base_start[i]);
}
// Call the macro when there are no more parameters in the command buffer
if (amount == methods_pending) {
CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
}
ProcessMacro(method, base_start, amount, amount == methods_pending);
return;
}
switch (method) {
@@ -335,15 +299,14 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
ProcessCBMultiData(method, base_start, amount);
break;
}
default: {
default:
for (std::size_t i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
}
}
break;
}
}

View File

@@ -1461,6 +1461,14 @@ public:
private:
void InitializeRegisterDefaults();
void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call);
u32 ProcessShadowRam(u32 method, u32 argument);
void ProcessDirtyRegisters(u32 method, u32 argument);
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
Core::System& system;
MemoryManager& memory_manager;