From a20bf80b050659466a1bd118d594103c20ec3e6d Mon Sep 17 00:00:00 2001 From: Jelle Raaijmakers Date: Sun, 8 May 2022 02:13:14 +0200 Subject: [PATCH] LibGL+LibGPU+LibSoftGPU: Implement point and line drawing Implement (anti)aliased point drawing and anti-aliased line drawing. Supported through LibGL's `GL_POINTS`, `GL_LINES`, `GL_LINE_LOOP` and `GL_LINE_STRIP`. In order to support this, `LibSoftGPU`s rasterization logic was reworked. Now, any primitive can be drawn by invoking `rasterize()` which takes care of the quad loop and fragment testing logic. Three callbacks need to be passed: * `set_coverage_mask`: the primitive needs to provide initial coverage mask information so fragments can be discarded early. * `set_quad_depth`: fragments survived stencil testing, so depth values need to be set so depth testing can take place. * `set_quad_attributes`: fragments survived depth testing, so fragment shading is going to take place. All attributes like color, tex coords and fog depth need to be set so alpha testing and eventually, fragment rasterization can take place. As of this commit, there are four instantiations of this function: * Triangle rasterization * Points - aliased * Points - anti-aliased * Lines - anti-aliased In order to standardize vertex processing for all primitive types, things like vertex transformation, lighting and tex coord generation are now taking place before clipping. --- Tests/LibGL/TestRender.cpp | 52 ++ Tests/LibGL/reference-images/0004_points.qoi | Bin 0 -> 170 bytes Tests/LibGL/reference-images/0005_lines.qoi | Bin 0 -> 2166 bytes Userland/Libraries/LibGL/GLContext.cpp | 27 +- Userland/Libraries/LibGPU/Device.h | 2 +- Userland/Libraries/LibGPU/Enums.h | 6 +- Userland/Libraries/LibSoftGPU/Clipper.cpp | 40 + Userland/Libraries/LibSoftGPU/Clipper.h | 2 + Userland/Libraries/LibSoftGPU/Device.cpp | 919 +++++++++++-------- Userland/Libraries/LibSoftGPU/Device.h | 16 +- Userland/Libraries/LibSoftGPU/PixelQuad.h | 23 +- Userland/Libraries/LibSoftGPU/SIMD.h | 13 + Userland/Libraries/LibSoftGPU/Triangle.h | 2 - 13 files changed, 712 insertions(+), 390 deletions(-) create mode 100644 Tests/LibGL/reference-images/0004_points.qoi create mode 100644 Tests/LibGL/reference-images/0005_lines.qoi diff --git a/Tests/LibGL/TestRender.cpp b/Tests/LibGL/TestRender.cpp index 640c54252f9..85604fae6a6 100644 --- a/Tests/LibGL/TestRender.cpp +++ b/Tests/LibGL/TestRender.cpp @@ -117,3 +117,55 @@ TEST_CASE(0003_rect_w_coordinate_regression) context->present(); expect_bitmap_equals_reference(context->frontbuffer(), "0003_rect_w_coordinate_regression"); } + +TEST_CASE(0004_points) +{ + auto context = create_testing_context(64, 64); + + // Aliased points + for (size_t i = 0; i < 3; ++i) { + glPointSize(1.f + i); + glBegin(GL_POINTS); + glVertex2f(-.5f + i * .5f, .5f); + glEnd(); + } + + // Anti-aliased points + glEnable(GL_POINT_SMOOTH); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + for (size_t i = 0; i < 3; ++i) { + glPointSize(3.f - i); + glBegin(GL_POINTS); + glVertex2f(-.5f + i * .5f, -.5f); + glEnd(); + } + + EXPECT_EQ(glGetError(), 0u); + + context->present(); + expect_bitmap_equals_reference(context->frontbuffer(), "0004_points"); +} + +TEST_CASE(0005_lines_antialiased) +{ + auto context = create_testing_context(64, 64); + + // Draw anti-aliased lines + glEnable(GL_LINE_SMOOTH); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + glBegin(GL_LINES); + for (size_t i = 0; i < 6; ++i) { + glVertex2f(-.9f, .25f - i * .1f); + glVertex2f(.9f, .9f - i * .36f); + } + glEnd(); + + EXPECT_EQ(glGetError(), 0u); + + context->present(); + expect_bitmap_equals_reference(context->frontbuffer(), "0005_lines"); +} diff --git a/Tests/LibGL/reference-images/0004_points.qoi b/Tests/LibGL/reference-images/0004_points.qoi new file mode 100644 index 0000000000000000000000000000000000000000..20695549e53335c7b9052133b6217fb315fce886 GIT binary patch literal 170 zcmXTS&rD-rU~m9o7KXp5;LN}O|NkE}y`yG&M(qF)A2fXprBT(P^REA!I(4d=f$8b3 k9sla<>kUj_Lkv0%;(?eT+7xWM=^b>nXomd)x(oyu0jXbf2mk;8 literal 0 HcmV?d00001 diff --git a/Tests/LibGL/reference-images/0005_lines.qoi b/Tests/LibGL/reference-images/0005_lines.qoi new file mode 100644 index 0000000000000000000000000000000000000000..c95b8683524410c54036a5ffb6460171f5d334fe GIT binary patch literal 2166 zcma)8ZA?>F7%r|rWmQp%D>w#BK-hp4;m8U?@CSkX;z zpXYs^`%O_{o=T-k5nr`xb#=8Lv9YnJsHi}9cQ;$6(6UcEwneLW5xI>i3L(W6Jv)YJr7meJACfvl`7e%EL;;^xhpSX^90e}6v; z3k%`n<6|NlLPJAw?b}?9rG-t3{vE{p{QS3sOP4Oe&(DuN+lmVp zE+~B(9UaA+H*YX7FaVRugj1(ZAvidg{pzyQC+# z{Ce&$a7eO)pk`0+lG1H{xFD}C3Pf3188R|5pwsE9XnG?)1~!I}kPw_ceL56FLqkT4 zjEtbRwiY=#If#geV6)oL&9DhBq^73g>eZ{dWM8zkwc*N@D@aaGhDM`dzd50*Q6L?G z!C*jMULG178!3||f4bzva0?+RDG8S^Uq)+dE9U0rw!v&RD_lE#_%OV^z1a(YiR@fqg9r}~$GLOo zP*YQb$B!R_4+DmWhvO7ZaGo-&g>dA^5tNpe;{N^nSYBSPCfvJsPr*4MApvT&npp)h z3PK#I?Y*Qx$0x@VdcB@4bdrV4q%!kh6v0A`#0CPfva*7)u`x6>G;Gnc>304|JhVs0 zFEiDXhl%z`WFm=wMEs5~A*fzP?CB3YM{iGaUcybH-S!kmvRHyW;}w0$3kuo4L*!~i zaEq4cBZ>505(V1R5+%Od6|Fs1J+V8E7Ui`>CVdVs~p20(R^d8yiJXu~DICNutG(TNDwS_76Yc z6a1CdOz|SmAcD>#LZWSs*6_X#dfTzYX^nC5f^HuRu6?G+oX{V^wdMaqYmZi1PwG&%v{^X6VDF|V>JvAivQERZ zDE2b$awxCsF;(oqKB+I>FXvnrR6-Plf+jc6CSh3Es6UE7I7^U_c7G=c`eU=(C_<-I zBwV<_%*+fwILFsn?&chl>Lt8O&psfhzbo49yI!~z1No44&%WOv{MCx-I9bH(n$PH* zOy;>TCdJ9P70>n1feq#)Z*}-sv@et2Z+fzwI>sQyA7xk4D!b5VSs?2|?q!#-$!oBz ZGauQ}?{zN=veMK*#wk9;k0vg+{{Z;W%pm{( literal 0 HcmV?d00001 diff --git a/Userland/Libraries/LibGL/GLContext.cpp b/Userland/Libraries/LibGL/GLContext.cpp index 2056ae929e8..367074c4465 100644 --- a/Userland/Libraries/LibGL/GLContext.cpp +++ b/Userland/Libraries/LibGL/GLContext.cpp @@ -133,22 +133,8 @@ void GLContext::gl_end() // Make sure we had a `glBegin` before this call... RETURN_WITH_ERROR_IF(!m_in_draw_state, GL_INVALID_OPERATION); - m_in_draw_state = false; - // FIXME: Add support for the remaining primitive types. - if (m_current_draw_mode != GL_TRIANGLES - && m_current_draw_mode != GL_TRIANGLE_FAN - && m_current_draw_mode != GL_TRIANGLE_STRIP - && m_current_draw_mode != GL_QUADS - && m_current_draw_mode != GL_QUAD_STRIP - && m_current_draw_mode != GL_POLYGON) { - - m_vertex_list.clear_with_capacity(); - dbgln_if(GL_DEBUG, "gl_end(): draw mode {:#x} unsupported", m_current_draw_mode); - RETURN_WITH_ERROR_IF(true, GL_INVALID_ENUM); - } - Vector enabled_texture_units; for (size_t i = 0; i < m_texture_units.size(); ++i) { if (m_texture_units[i].texture_2d_enabled()) @@ -159,6 +145,18 @@ void GLContext::gl_end() GPU::PrimitiveType primitive_type; switch (m_current_draw_mode) { + case GL_LINE_LOOP: + primitive_type = GPU::PrimitiveType::LineLoop; + break; + case GL_LINE_STRIP: + primitive_type = GPU::PrimitiveType::LineStrip; + break; + case GL_LINES: + primitive_type = GPU::PrimitiveType::Lines; + break; + case GL_POINTS: + primitive_type = GPU::PrimitiveType::Points; + break; case GL_TRIANGLES: primitive_type = GPU::PrimitiveType::Triangles; break; @@ -178,7 +176,6 @@ void GLContext::gl_end() } m_rasterizer->draw_primitives(primitive_type, m_model_view_matrix, m_projection_matrix, m_texture_matrix, m_vertex_list, enabled_texture_units); - m_vertex_list.clear_with_capacity(); } diff --git a/Userland/Libraries/LibGPU/Device.h b/Userland/Libraries/LibGPU/Device.h index 39ae7262c40..43ad35da133 100644 --- a/Userland/Libraries/LibGPU/Device.h +++ b/Userland/Libraries/LibGPU/Device.h @@ -40,7 +40,7 @@ public: virtual DeviceInfo info() const = 0; - virtual void draw_primitives(PrimitiveType, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform, FloatMatrix4x4 const& texture_transform, Vector const& vertices, Vector const& enabled_texture_units) = 0; + virtual void draw_primitives(PrimitiveType, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform, FloatMatrix4x4 const& texture_transform, Vector& vertices, Vector const& enabled_texture_units) = 0; virtual void resize(Gfx::IntSize const& min_size) = 0; virtual void clear_color(FloatVector4 const&) = 0; virtual void clear_depth(DepthType) = 0; diff --git a/Userland/Libraries/LibGPU/Enums.h b/Userland/Libraries/LibGPU/Enums.h index 27d53e8f93b..3486fd03d47 100644 --- a/Userland/Libraries/LibGPU/Enums.h +++ b/Userland/Libraries/LibGPU/Enums.h @@ -88,9 +88,13 @@ enum class WindingOrder { }; enum class PrimitiveType { + Lines, + LineLoop, + LineStrip, + Points, + TriangleFan, Triangles, TriangleStrip, - TriangleFan, Quads, }; diff --git a/Userland/Libraries/LibSoftGPU/Clipper.cpp b/Userland/Libraries/LibSoftGPU/Clipper.cpp index 7115140dde3..e26b5ec707d 100644 --- a/Userland/Libraries/LibSoftGPU/Clipper.cpp +++ b/Userland/Libraries/LibSoftGPU/Clipper.cpp @@ -91,6 +91,46 @@ FLATTEN static constexpr void clip_plane(Vector& input_list, Vector } } +void Clipper::clip_points_against_frustum(Vector& vertices) +{ + m_vertex_buffer.clear_with_capacity(); + + for (auto& vertex : vertices) { + auto const coords = vertex.clip_coordinates; + if (point_within_clip_plane(coords) && point_within_clip_plane(coords) + && point_within_clip_plane(coords) && point_within_clip_plane(coords) + && point_within_clip_plane(coords) && point_within_clip_plane(coords)) + m_vertex_buffer.append(vertex); + } + + vertices.clear_with_capacity(); + vertices.extend(m_vertex_buffer); +} + +template +static constexpr bool constrain_line_within_plane(GPU::Vertex& from, GPU::Vertex& to) +{ + auto from_within_plane = point_within_clip_plane(from.clip_coordinates); + auto to_within_plane = point_within_clip_plane(to.clip_coordinates); + if (!from_within_plane && !to_within_plane) + return false; + if (!from_within_plane) + from = clip_intersection_point(from, to); + else if (!to_within_plane) + to = clip_intersection_point(from, to); + return true; +} + +bool Clipper::clip_line_against_frustum(GPU::Vertex& from, GPU::Vertex& to) +{ + return constrain_line_within_plane(from, to) + && constrain_line_within_plane(from, to) + && constrain_line_within_plane(from, to) + && constrain_line_within_plane(from, to) + && constrain_line_within_plane(from, to) + && constrain_line_within_plane(from, to); +} + void Clipper::clip_triangle_against_frustum(Vector& input_verts) { // FIXME C++23. Static reflection will provide looping over all enum values. diff --git a/Userland/Libraries/LibSoftGPU/Clipper.h b/Userland/Libraries/LibSoftGPU/Clipper.h index f944088331a..9484f3b7bd9 100644 --- a/Userland/Libraries/LibSoftGPU/Clipper.h +++ b/Userland/Libraries/LibSoftGPU/Clipper.h @@ -26,6 +26,8 @@ public: Clipper() = default; + void clip_points_against_frustum(Vector& vertices); + bool clip_line_against_frustum(GPU::Vertex& from, GPU::Vertex& to); void clip_triangle_against_frustum(Vector& input_vecs); private: diff --git a/Userland/Libraries/LibSoftGPU/Device.cpp b/Userland/Libraries/LibSoftGPU/Device.cpp index e9c17266d6c..b8d54482289 100644 --- a/Userland/Libraries/LibSoftGPU/Device.cpp +++ b/Userland/Libraries/LibSoftGPU/Device.cpp @@ -6,6 +6,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -180,96 +181,18 @@ void Device::setup_blend_factors() } } -void Device::rasterize_triangle(Triangle const& triangle) +template +ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_coverage_mask, CB2 set_quad_depth, CB3 set_quad_attributes) { - INCREASE_STATISTICS_COUNTER(g_num_rasterized_triangles, 1); - // Return if alpha testing is a no-op if (m_options.enable_alpha_test && m_options.alpha_test_func == GPU::AlphaTestFunction::Never) return; - auto const& vertex0 = triangle.vertices[0]; - auto const& vertex1 = triangle.vertices[1]; - auto const& vertex2 = triangle.vertices[2]; - - auto const& v0 = triangle.subpixel_coordinates[0]; - auto const& v1 = triangle.subpixel_coordinates[1]; - auto const& v2 = triangle.subpixel_coordinates[2]; - - auto const one_over_area = 1.0f / triangle.area; - - auto render_bounds = m_frame_buffer->rect(); - if (m_options.scissor_enabled) - render_bounds.intersect(m_options.scissor_box); - - // This function calculates the 3 edge values for the pixel relative to the triangle. - auto calculate_edge_values4 = [v0, v1, v2](Vector2 const& p) -> Vector3 { - return { - edge_function4(v1, v2, p), - edge_function4(v2, v0, p), - edge_function4(v0, v1, p), - }; - }; - - // Zero is used in testing against edge values below, applying the "top-left rule". If a pixel - // lies exactly on an edge shared by two triangles, we only render that pixel if the edge in - // question is a "top" or "left" edge. By setting either a 1 or 0, we effectively change the - // comparisons against the edge values below from "> 0" into ">= 0". - IntVector3 const zero { - (v2.y() < v1.y() || (v2.y() == v1.y() && v2.x() < v1.x())) ? 0 : 1, - (v0.y() < v2.y() || (v0.y() == v2.y() && v0.x() < v2.x())) ? 0 : 1, - (v1.y() < v0.y() || (v1.y() == v0.y() && v1.x() < v0.x())) ? 0 : 1, - }; - - // This function tests whether a point as identified by its 3 edge values lies within the triangle - auto test_point4 = [zero](Vector3 const& edges) -> i32x4 { - return edges.x() >= zero.x() - && edges.y() >= zero.y() - && edges.z() >= zero.z(); - }; - - // Calculate block-based bounds - // clang-format off - int const bx0 = max(render_bounds.left(), min(min(v0.x(), v1.x()), v2.x()) / subpixel_factor) & ~1; - int const bx1 = (min(render_bounds.right(), max(max(v0.x(), v1.x()), v2.x()) / subpixel_factor) & ~1) + 2; - int const by0 = max(render_bounds.top(), min(min(v0.y(), v1.y()), v2.y()) / subpixel_factor) & ~1; - int const by1 = (min(render_bounds.bottom(), max(max(v0.y(), v1.y()), v2.y()) / subpixel_factor) & ~1) + 2; - // clang-format on - - // Calculate depth of fragment for fog; - // OpenGL 1.5 spec chapter 3.10: "An implementation may choose to approximate the - // eye-coordinate distance from the eye to each fragment center by |Ze|." - Vector3 fog_depth; - if (m_options.fog_enabled) { - fog_depth = { - expand4(abs(vertex0.eye_coordinates.z())), - expand4(abs(vertex1.eye_coordinates.z())), - expand4(abs(vertex2.eye_coordinates.z())), - }; - } - - auto const render_bounds_left = render_bounds.left(); - auto const render_bounds_right = render_bounds.right(); - auto const render_bounds_top = render_bounds.top(); - auto const render_bounds_bottom = render_bounds.bottom(); - - auto const half_pixel_offset = Vector2 { expand4(subpixel_factor / 2), expand4(subpixel_factor / 2) }; - + // Buffers auto color_buffer = m_frame_buffer->color_buffer(); auto depth_buffer = m_frame_buffer->depth_buffer(); auto stencil_buffer = m_frame_buffer->stencil_buffer(); - auto const window_z_coordinates = Vector3 { - expand4(vertex0.window_coordinates.z()), - expand4(vertex1.window_coordinates.z()), - expand4(vertex2.window_coordinates.z()), - }; - auto const window_w_coordinates = Vector3 { - expand4(vertex0.window_coordinates.w()), - expand4(vertex1.window_coordinates.w()), - expand4(vertex2.window_coordinates.w()), - }; - // Stencil configuration and writing auto const& stencil_configuration = m_stencil_configuration[GPU::Face::Front]; auto const stencil_reference_value = stencil_configuration.reference_value & stencil_configuration.test_mask; @@ -308,22 +231,33 @@ void Device::rasterize_triangle(Triangle const& triangle) store4_masked(stencil_value, stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], pixel_mask); }; - // Iterate over all blocks within the bounds of the triangle - for (int by = by0; by < by1; by += 2) { - for (int bx = bx0; bx < bx1; bx += 2) { - PixelQuad quad; + // Limit rendering to framebuffer and scissor rects + render_bounds.intersect(m_frame_buffer->rect()); + if (m_options.scissor_enabled) + render_bounds.intersect(m_options.scissor_box); + // Quad bounds + auto const render_bounds_left = render_bounds.left(); + auto const render_bounds_right = render_bounds.right(); + auto const render_bounds_top = render_bounds.top(); + auto const render_bounds_bottom = render_bounds.bottom(); + auto const qx0 = render_bounds_left & ~1; + auto const qx1 = render_bounds_right & ~1; + auto const qy0 = render_bounds_top & ~1; + auto const qy1 = render_bounds_bottom & ~1; + + // Rasterize all quads + // FIXME: this could be embarrasingly parallel + for (int qy = qy0; qy <= qy1; qy += 2) { + for (int qx = qx0; qx <= qx1; qx += 2) { + PixelQuad quad; quad.screen_coordinates = { - i32x4 { bx, bx + 1, bx, bx + 1 }, - i32x4 { by, by, by + 1, by + 1 }, + i32x4 { qx, qx + 1, qx, qx + 1 }, + i32x4 { qy, qy, qy + 1, qy + 1 }, }; - auto edge_values = calculate_edge_values4(quad.screen_coordinates * subpixel_factor + half_pixel_offset); - - // Generate triangle coverage mask - quad.mask = test_point4(edge_values); - - // Test quad against intersection of render target size and scissor rect + // Set coverage mask and test against render bounds + set_coverage_mask(quad); quad.mask &= quad.screen_coordinates.x() >= render_bounds_left && quad.screen_coordinates.x() <= render_bounds_right && quad.screen_coordinates.y() >= render_bounds_top @@ -339,10 +273,10 @@ void Device::rasterize_triangle(Triangle const& triangle) GPU::StencilType* stencil_ptrs[4]; i32x4 stencil_value; if (m_options.enable_stencil_test) { - stencil_ptrs[0] = coverage_bits & 1 ? &stencil_buffer->scanline(by)[bx] : nullptr; - stencil_ptrs[1] = coverage_bits & 2 ? &stencil_buffer->scanline(by)[bx + 1] : nullptr; - stencil_ptrs[2] = coverage_bits & 4 ? &stencil_buffer->scanline(by + 1)[bx] : nullptr; - stencil_ptrs[3] = coverage_bits & 8 ? &stencil_buffer->scanline(by + 1)[bx + 1] : nullptr; + stencil_ptrs[0] = coverage_bits & 1 ? &stencil_buffer->scanline(qy)[qx] : nullptr; + stencil_ptrs[1] = coverage_bits & 2 ? &stencil_buffer->scanline(qy)[qx + 1] : nullptr; + stencil_ptrs[2] = coverage_bits & 4 ? &stencil_buffer->scanline(qy + 1)[qx] : nullptr; + stencil_ptrs[3] = coverage_bits & 8 ? &stencil_buffer->scanline(qy + 1)[qx + 1] : nullptr; stencil_value = load4_masked(stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], quad.mask); stencil_value &= stencil_configuration.test_mask; @@ -393,28 +327,20 @@ void Device::rasterize_triangle(Triangle const& triangle) continue; } - // Calculate barycentric coordinates from previously calculated edge values - quad.barycentrics = Vector3 { - to_f32x4(edge_values.x()), - to_f32x4(edge_values.y()), - to_f32x4(edge_values.z()), - } * one_over_area; - // Depth testing GPU::DepthType* depth_ptrs[4] = { - coverage_bits & 1 ? &depth_buffer->scanline(by)[bx] : nullptr, - coverage_bits & 2 ? &depth_buffer->scanline(by)[bx + 1] : nullptr, - coverage_bits & 4 ? &depth_buffer->scanline(by + 1)[bx] : nullptr, - coverage_bits & 8 ? &depth_buffer->scanline(by + 1)[bx + 1] : nullptr, + coverage_bits & 1 ? &depth_buffer->scanline(qy)[qx] : nullptr, + coverage_bits & 2 ? &depth_buffer->scanline(qy)[qx + 1] : nullptr, + coverage_bits & 4 ? &depth_buffer->scanline(qy + 1)[qx] : nullptr, + coverage_bits & 8 ? &depth_buffer->scanline(qy + 1)[qx + 1] : nullptr, }; if (m_options.enable_depth_test) { - auto depth = load4_masked(depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask); - - quad.depth = window_z_coordinates.dot(quad.barycentrics); + set_quad_depth(quad); // FIXME: Also apply depth_offset_factor which depends on the depth gradient if (m_options.depth_offset_enabled) quad.depth += m_options.depth_offset_constant * NumericLimits::epsilon(); + auto depth = load4_masked(depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask); i32x4 depth_test_passed; switch (m_options.depth_func) { case GPU::DepthTestFunction::Always: @@ -505,22 +431,7 @@ void Device::rasterize_triangle(Triangle const& triangle) INCREASE_STATISTICS_COUNTER(g_num_pixels_shaded, maskcount(quad.mask)); - // Draw the pixels according to the previously generated mask - auto const interpolated_reciprocal_w = window_w_coordinates.dot(quad.barycentrics); - quad.barycentrics = quad.barycentrics * window_w_coordinates / interpolated_reciprocal_w; - - // FIXME: make this more generic. We want to interpolate more than just color and uv - if (m_options.shade_smooth) - quad.vertex_color = interpolate(expand4(vertex0.color), expand4(vertex1.color), expand4(vertex2.color), quad.barycentrics); - else - quad.vertex_color = expand4(vertex0.color); - - for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) - quad.texture_coordinates[i] = interpolate(expand4(vertex0.tex_coords[i]), expand4(vertex1.tex_coords[i]), expand4(vertex2.tex_coords[i]), quad.barycentrics); - - if (m_options.fog_enabled) - quad.fog_depth = fog_depth.dot(quad.barycentrics); - + set_quad_attributes(quad); shade_fragments(quad); if (m_options.enable_alpha_test && m_options.alpha_test_func != GPU::AlphaTestFunction::Always && !test_alpha(quad)) @@ -535,10 +446,10 @@ void Device::rasterize_triangle(Triangle const& triangle) continue; GPU::ColorType* color_ptrs[4] = { - coverage_bits & 1 ? &color_buffer->scanline(by)[bx] : nullptr, - coverage_bits & 2 ? &color_buffer->scanline(by)[bx + 1] : nullptr, - coverage_bits & 4 ? &color_buffer->scanline(by + 1)[bx] : nullptr, - coverage_bits & 8 ? &color_buffer->scanline(by + 1)[bx + 1] : nullptr, + coverage_bits & 1 ? &color_buffer->scanline(qy)[qx] : nullptr, + coverage_bits & 2 ? &color_buffer->scanline(qy)[qx + 1] : nullptr, + coverage_bits & 4 ? &color_buffer->scanline(qy + 1)[qx] : nullptr, + coverage_bits & 8 ? &color_buffer->scanline(qy + 1)[qx + 1] : nullptr, }; u32x4 dst_u32; @@ -549,7 +460,7 @@ void Device::rasterize_triangle(Triangle const& triangle) INCREASE_STATISTICS_COUNTER(g_num_pixels_blended, maskcount(quad.mask)); // Blend color values from pixel_staging into color_buffer - Vector4 const& src = quad.out_color; + auto const& src = quad.out_color; auto dst = to_vec4(dst_u32); auto src_factor = expand4(m_alpha_blend_factors.src_constant) @@ -575,6 +486,299 @@ void Device::rasterize_triangle(Triangle const& triangle) } } +void Device::rasterize_line_aliased(GPU::Vertex& from, GPU::Vertex& to) +{ + // FIXME: implement aliased lines; for now we fall back to anti-aliased logic + rasterize_line_antialiased(from, to); +} + +void Device::rasterize_line_antialiased(GPU::Vertex& from, GPU::Vertex& to) +{ + auto const from_coords = from.window_coordinates.xy(); + auto const to_coords = to.window_coordinates.xy(); + auto const line_width = ceilf(m_options.line_width); + auto const line_radius = line_width / 2; + + auto render_bounds = Gfx::IntRect { + min(from_coords.x(), to_coords.x()), + min(from_coords.y(), to_coords.y()), + abs(from_coords.x() - to_coords.x()) + 1, + abs(from_coords.y() - to_coords.y()) + 1, + }; + render_bounds.inflate(line_width, line_width); + + auto const from_coords4 = expand4(from_coords); + auto const line_vector = to_coords - from_coords; + auto const line_vector4 = expand4(line_vector); + auto const line_dot4 = expand4(line_vector.dot(line_vector)); + + auto const from_depth4 = expand4(from.window_coordinates.z()); + auto const to_depth4 = expand4(to.window_coordinates.z()); + + auto const from_color4 = expand4(from.color); + auto const from_fog_depth4 = expand4(abs(from.eye_coordinates.z())); + + // Rasterize using a 2D signed distance field for a line segment + // FIXME: performance-wise, this might be the absolute worst way to draw an anti-aliased line + f32x4 distance_along_line; + rasterize( + render_bounds, + [&from_coords4, &distance_along_line, &line_vector4, &line_dot4, &line_radius](auto& quad) { + auto const screen_coordinates4 = to_vec2_f32x4(quad.screen_coordinates); + auto const pixel_vector = screen_coordinates4 - from_coords4; + distance_along_line = AK::SIMD::clamp(pixel_vector.dot(line_vector4) / line_dot4, 0.f, 1.f); + auto distance_to_line = length(pixel_vector - line_vector4 * distance_along_line) - line_radius; + + // Add .5f to the distance so coverage transitions half a pixel before the actual border + quad.coverage = 1.f - AK::SIMD::clamp(distance_to_line + 0.5f, 0.f, 1.f); + quad.mask = quad.coverage > 0.f; + }, + [&from_depth4, &to_depth4, &distance_along_line](auto& quad) { + quad.depth = mix(from_depth4, to_depth4, distance_along_line); + }, + [&from_color4, &from, &from_fog_depth4](auto& quad) { + // FIXME: interpolate color, tex coords and fog depth along the distance of the line + // in clip space (i.e. NOT distance_from_line) + quad.vertex_color = from_color4; + for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) + quad.texture_coordinates[i] = expand4(from.tex_coords[i]); + quad.fog_depth = from_fog_depth4; + }); +} + +void Device::rasterize_line(GPU::Vertex& from, GPU::Vertex& to) +{ + if (m_options.line_smooth) + rasterize_line_antialiased(from, to); + else + rasterize_line_aliased(from, to); +} + +void Device::rasterize_point_aliased(GPU::Vertex& point) +{ + // Determine aliased point width + constexpr size_t maximum_aliased_point_size = 64; + auto point_width = clamp(round_to(m_options.point_size), 1, maximum_aliased_point_size); + + // Determine aliased center coordinates + IntVector2 point_center; + if (point_width % 2 == 1) + point_center = point.window_coordinates.xy().to_type(); + else + point_center = (point.window_coordinates.xy() + FloatVector2 { .5f, .5f }).to_type(); + + // Aliased points are rects; calculate boundaries around center + auto point_rect = Gfx::IntRect { + point_center.x() - point_width / 2, + point_center.y() - point_width / 2, + point_width, + point_width, + }; + + // Rasterize the point as a rect + rasterize( + point_rect, + [](auto& quad) { + // We already passed in point_rect, so this doesn't matter + quad.mask = expand4(~0); + }, + [&point](auto& quad) { + quad.depth = expand4(point.window_coordinates.z()); + }, + [&point](auto& quad) { + quad.vertex_color = expand4(point.color); + for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) + quad.texture_coordinates[i] = expand4(point.tex_coords[i]); + quad.fog_depth = expand4(abs(point.eye_coordinates.z())); + }); +} + +void Device::rasterize_point_antialiased(GPU::Vertex& point) +{ + auto const center = point.window_coordinates.xy(); + auto const center4 = expand4(center); + auto const radius = m_options.point_size / 2; + + auto render_bounds = Gfx::IntRect { + center.x() - radius, + center.y() - radius, + radius * 2 + 1, + radius * 2 + 1, + }; + + // Rasterize using a 2D signed distance field for a circle + rasterize( + render_bounds, + [¢er4, &radius](auto& quad) { + auto screen_coords = to_vec2_f32x4(quad.screen_coordinates); + auto distance_to_point = length(center4 - screen_coords) - radius; + + // Add .5f to the distance so coverage transitions half a pixel before the actual border + quad.coverage = 1.f - AK::SIMD::clamp(distance_to_point + .5f, 0.f, 1.f); + quad.mask = quad.coverage > 0.f; + }, + [&point](auto& quad) { + quad.depth = expand4(point.window_coordinates.z()); + }, + [&point](auto& quad) { + quad.vertex_color = expand4(point.color); + for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) + quad.texture_coordinates[i] = expand4(point.tex_coords[i]); + quad.fog_depth = expand4(abs(point.eye_coordinates.z())); + }); +} + +void Device::rasterize_point(GPU::Vertex& point) +{ + // Divide texture coordinates R, S and T by Q + for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) { + auto& tex_coord = point.tex_coords[i]; + auto one_over_w = 1 / tex_coord.w(); + tex_coord = { + tex_coord.x() * one_over_w, + tex_coord.y() * one_over_w, + tex_coord.z() * one_over_w, + tex_coord.w(), + }; + } + + if (m_options.point_smooth) + rasterize_point_antialiased(point); + else + rasterize_point_aliased(point); +} + +void Device::rasterize_triangle(Triangle& triangle) +{ + INCREASE_STATISTICS_COUNTER(g_num_rasterized_triangles, 1); + + auto v0 = (triangle.vertices[0].window_coordinates.xy() * subpixel_factor).to_rounded(); + auto v1 = (triangle.vertices[1].window_coordinates.xy() * subpixel_factor).to_rounded(); + auto v2 = (triangle.vertices[2].window_coordinates.xy() * subpixel_factor).to_rounded(); + + auto triangle_area = edge_function(v0, v1, v2); + if (triangle_area == 0) + return; + + // Perform face culling + if (m_options.enable_culling) { + bool is_front = (m_options.front_face == GPU::WindingOrder::CounterClockwise ? triangle_area > 0 : triangle_area < 0); + + if (!is_front && m_options.cull_back) + return; + + if (is_front && m_options.cull_front) + return; + } + + // Force counter-clockwise ordering of vertices + if (triangle_area < 0) { + swap(triangle.vertices[0], triangle.vertices[1]); + swap(v0, v1); + triangle_area *= -1; + } + + auto const& vertex0 = triangle.vertices[0]; + auto const& vertex1 = triangle.vertices[1]; + auto const& vertex2 = triangle.vertices[2]; + + auto const one_over_area = 1.0f / triangle_area; + + // This function calculates the 3 edge values for the pixel relative to the triangle. + auto calculate_edge_values4 = [v0, v1, v2](Vector2 const& p) -> Vector3 { + return { + edge_function4(v1, v2, p), + edge_function4(v2, v0, p), + edge_function4(v0, v1, p), + }; + }; + + // Zero is used in testing against edge values below, applying the "top-left rule". If a pixel + // lies exactly on an edge shared by two triangles, we only render that pixel if the edge in + // question is a "top" or "left" edge. By setting either a 1 or 0, we effectively change the + // comparisons against the edge values below from "> 0" into ">= 0". + IntVector3 const zero { + (v2.y() < v1.y() || (v2.y() == v1.y() && v2.x() < v1.x())) ? 0 : 1, + (v0.y() < v2.y() || (v0.y() == v2.y() && v0.x() < v2.x())) ? 0 : 1, + (v1.y() < v0.y() || (v1.y() == v0.y() && v1.x() < v0.x())) ? 0 : 1, + }; + + // This function tests whether a point as identified by its 3 edge values lies within the triangle + auto test_point4 = [zero](Vector3 const& edges) -> i32x4 { + return edges.x() >= zero.x() + && edges.y() >= zero.y() + && edges.z() >= zero.z(); + }; + + // Calculate render bounds based on the triangle's vertices + Gfx::IntRect render_bounds; + render_bounds.set_left(min(min(v0.x(), v1.x()), v2.x()) / subpixel_factor); + render_bounds.set_right(max(max(v0.x(), v1.x()), v2.x()) / subpixel_factor); + render_bounds.set_top(min(min(v0.y(), v1.y()), v2.y()) / subpixel_factor); + render_bounds.set_bottom(max(max(v0.y(), v1.y()), v2.y()) / subpixel_factor); + + // Calculate depth of fragment for fog; + // OpenGL 1.5 chapter 3.10: "An implementation may choose to approximate the + // eye-coordinate distance from the eye to each fragment center by |Ze|." + Vector3 fog_depth; + if (m_options.fog_enabled) { + fog_depth = { + expand4(abs(vertex0.eye_coordinates.z())), + expand4(abs(vertex1.eye_coordinates.z())), + expand4(abs(vertex2.eye_coordinates.z())), + }; + } + + auto const half_pixel_offset = Vector2 { expand4(subpixel_factor / 2), expand4(subpixel_factor / 2) }; + + auto const window_z_coordinates = Vector3 { + expand4(vertex0.window_coordinates.z()), + expand4(vertex1.window_coordinates.z()), + expand4(vertex2.window_coordinates.z()), + }; + auto const window_w_coordinates = Vector3 { + expand4(vertex0.window_coordinates.w()), + expand4(vertex1.window_coordinates.w()), + expand4(vertex2.window_coordinates.w()), + }; + + rasterize( + render_bounds, + [&](auto& quad) { + auto edge_values = calculate_edge_values4(quad.screen_coordinates * subpixel_factor + half_pixel_offset); + quad.mask = test_point4(edge_values); + + quad.barycentrics = { + to_f32x4(edge_values.x()), + to_f32x4(edge_values.y()), + to_f32x4(edge_values.z()), + }; + }, + [&one_over_area, &window_z_coordinates](auto& quad) { + // Determine each edge's ratio to the total area + quad.barycentrics = quad.barycentrics * one_over_area; + + // Because the Z coordinates were divided by W, we can interpolate between them + quad.depth = window_z_coordinates.dot(quad.barycentrics); + }, + [&](auto& quad) { + auto const interpolated_reciprocal_w = window_w_coordinates.dot(quad.barycentrics); + quad.barycentrics = quad.barycentrics * window_w_coordinates / interpolated_reciprocal_w; + + // FIXME: make this more generic. We want to interpolate more than just color and uv + if (m_options.shade_smooth) + quad.vertex_color = interpolate(expand4(vertex0.color), expand4(vertex1.color), expand4(vertex2.color), quad.barycentrics); + else + quad.vertex_color = expand4(vertex0.color); + + for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) + quad.texture_coordinates[i] = interpolate(expand4(vertex0.tex_coords[i]), expand4(vertex1.tex_coords[i]), expand4(vertex2.tex_coords[i]), quad.barycentrics); + + if (m_options.fog_enabled) + quad.fog_depth = fog_depth.dot(quad.barycentrics); + }); +} + Device::Device(Gfx::IntSize const& size) : m_frame_buffer(FrameBuffer::try_create(size).release_value_but_fixme_should_propagate_errors()) { @@ -644,8 +848,127 @@ static void generate_texture_coordinates(GPU::Vertex& vertex, GPU::RasterizerOpt } } +void Device::calculate_vertex_lighting(GPU::Vertex& vertex) const +{ + if (!m_options.lighting_enabled) + return; + + auto const& material = m_materials.at(0); + auto ambient = material.ambient; + auto diffuse = material.diffuse; + auto emissive = material.emissive; + auto specular = material.specular; + + if (m_options.color_material_enabled + && (m_options.color_material_face == GPU::ColorMaterialFace::Front || m_options.color_material_face == GPU::ColorMaterialFace::FrontAndBack)) { + switch (m_options.color_material_mode) { + case GPU::ColorMaterialMode::Ambient: + ambient = vertex.color; + break; + case GPU::ColorMaterialMode::AmbientAndDiffuse: + ambient = vertex.color; + diffuse = vertex.color; + break; + case GPU::ColorMaterialMode::Diffuse: + diffuse = vertex.color; + break; + case GPU::ColorMaterialMode::Emissive: + emissive = vertex.color; + break; + case GPU::ColorMaterialMode::Specular: + specular = vertex.color; + break; + } + } + + FloatVector4 result_color = emissive + ambient * m_lighting_model.scene_ambient_color; + + for (auto const& light : m_lights) { + if (!light.is_enabled) + continue; + + // We need to save the length here because the attenuation factor requires a non-normalized vector! + auto sgi_arrow_operator = [](FloatVector4 const& p1, FloatVector4 const& p2, float& output_length) { + FloatVector3 light_vector; + if ((p1.w() != 0.f) && (p2.w() == 0.f)) + light_vector = p2.xyz(); + else if ((p1.w() == 0.f) && (p2.w() != 0.f)) + light_vector = -p1.xyz(); + else + light_vector = p2.xyz() - p1.xyz(); + + output_length = light_vector.length(); + if (output_length == 0.f) + return light_vector; + return light_vector / output_length; + }; + + auto sgi_dot_operator = [](FloatVector3 const& d1, FloatVector3 const& d2) { + return AK::max(d1.dot(d2), 0.0f); + }; + + float vertex_to_light_length = 0.f; + FloatVector3 vertex_to_light = sgi_arrow_operator(vertex.eye_coordinates, light.position, vertex_to_light_length); + + // Light attenuation value. + float light_attenuation_factor = 1.0f; + if (light.position.w() != 0.0f) + light_attenuation_factor = 1.0f / (light.constant_attenuation + (light.linear_attenuation * vertex_to_light_length) + (light.quadratic_attenuation * vertex_to_light_length * vertex_to_light_length)); + + // Spotlight factor + float spotlight_factor = 1.0f; + if (light.spotlight_cutoff_angle != 180.0f) { + auto const vertex_to_light_dot_spotlight_direction = sgi_dot_operator(vertex_to_light, light.spotlight_direction.normalized()); + auto const cos_spotlight_cutoff = AK::cos(light.spotlight_cutoff_angle * AK::Pi / 180.f); + + if (vertex_to_light_dot_spotlight_direction >= cos_spotlight_cutoff) + spotlight_factor = AK::pow(vertex_to_light_dot_spotlight_direction, light.spotlight_exponent); + else + spotlight_factor = 0.0f; + } + + // FIXME: The spec allows for splitting the colors calculated here into multiple different colors (primary/secondary color). Investigate what this means. + (void)m_lighting_model.color_control; + + // FIXME: Two sided lighting should be implemented eventually (I believe this is where the normals are -ve and then lighting is calculated with the BACK material) + (void)m_lighting_model.two_sided_lighting; + + // Ambient + auto const ambient_component = ambient * light.ambient_intensity; + + // Diffuse + auto const normal_dot_vertex_to_light = sgi_dot_operator(vertex.normal, vertex_to_light); + auto const diffuse_component = diffuse * light.diffuse_intensity * normal_dot_vertex_to_light; + + // Specular + FloatVector4 specular_component = { 0.0f, 0.0f, 0.0f, 0.0f }; + if (normal_dot_vertex_to_light > 0.0f) { + FloatVector3 half_vector_normalized; + if (!m_lighting_model.viewer_at_infinity) { + half_vector_normalized = vertex_to_light + FloatVector3(0.0f, 0.0f, 1.0f); + } else { + auto const vertex_to_eye_point = sgi_arrow_operator(vertex.eye_coordinates, { 0.f, 0.f, 0.f, 1.f }, vertex_to_light_length); + half_vector_normalized = vertex_to_light + vertex_to_eye_point; + } + half_vector_normalized.normalize(); + + auto const normal_dot_half_vector = sgi_dot_operator(vertex.normal, half_vector_normalized); + auto const specular_coefficient = AK::pow(normal_dot_half_vector, material.shininess); + specular_component = specular * light.specular_intensity * specular_coefficient; + } + + auto color = ambient_component + diffuse_component + specular_component; + color = color * light_attenuation_factor * spotlight_factor; + result_color += color; + } + + vertex.color = result_color; + vertex.color.set_w(diffuse.w()); // OpenGL 1.5 spec, page 59: "The A produced by lighting is the alpha value associated with diffuse color material" + vertex.color.clamp(0.0f, 1.0f); +} + void Device::draw_primitives(GPU::PrimitiveType primitive_type, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform, - FloatMatrix4x4 const& texture_transform, Vector const& vertices, Vector const& enabled_texture_units) + FloatMatrix4x4 const& texture_transform, Vector& vertices, Vector const& enabled_texture_units) { // At this point, the user has effectively specified that they are done with defining the geometry // of what they want to draw. We now need to do a few things (https://www.khronos.org/opengl/wiki/Rendering_Pipeline_Overview): @@ -654,15 +977,106 @@ void Device::draw_primitives(GPU::PrimitiveType primitive_type, FloatMatrix4x4 c // 2. Transform all of the vertices from eye space into clip space by multiplying by the projection matrix // 3. If culling is enabled, we cull the desired faces (https://learnopengl.com/Advanced-OpenGL/Face-culling) // 4. Each element of the vertex is then divided by w to bring the positions into NDC (Normalized Device Coordinates) - // 5. The vertices are sorted (for the rasterizer, how are we doing this? 3Dfx did this top to bottom in terms of vertex y coordinates) - // 6. The vertices are then sent off to the rasterizer and drawn to the screen + // 5. The triangle's vertices are sorted in a counter-clockwise orientation + // 6. The triangles are then sent off to the rasterizer and drawn to the screen + + if (vertices.is_empty()) + return; m_enabled_texture_units = enabled_texture_units; - m_triangle_list.clear_with_capacity(); - m_processed_triangles.clear_with_capacity(); + // Set up normals transform by taking the upper left 3x3 elements from the model view matrix + // See section 2.11.3 of the OpenGL 1.5 spec + auto const normal_transform = model_view_transform.submatrix_from_topleft<3>().transpose().inverse(); + + // Generate texture coordinates if at least one coordinate is enabled + bool texture_coordinate_generation_enabled = any_of( + m_options.texcoord_generation_enabled_coordinates, + [](auto coordinates_enabled) { return coordinates_enabled != GPU::TexCoordGenerationCoordinate::None; }); + + // First, transform all vertices + for (auto& vertex : vertices) { + vertex.eye_coordinates = model_view_transform * vertex.position; + + vertex.normal = normal_transform * vertex.normal; + if (m_options.normalization_enabled) + vertex.normal.normalize(); + + calculate_vertex_lighting(vertex); + + vertex.clip_coordinates = projection_transform * vertex.eye_coordinates; + + if (texture_coordinate_generation_enabled) + generate_texture_coordinates(vertex, m_options); + + for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) + vertex.tex_coords[i] = texture_transform * vertex.tex_coords[i]; + } + + // Window coordinate calculation + auto const viewport = m_options.viewport; + auto const viewport_half_width = viewport.width() / 2.f; + auto const viewport_half_height = viewport.height() / 2.f; + auto const viewport_center_x = viewport.x() + viewport_half_width; + auto const viewport_center_y = viewport.y() + viewport_half_height; + auto const depth_half_range = (m_options.depth_max - m_options.depth_min) / 2; + auto const depth_halfway = (m_options.depth_min + m_options.depth_max) / 2; + + auto calculate_vertex_window_coordinates = [&](GPU::Vertex& vertex) { + auto const one_over_w = 1 / vertex.clip_coordinates.w(); + auto const ndc_coordinates = vertex.clip_coordinates.xyz() * one_over_w; + + vertex.window_coordinates = { + viewport_center_x + ndc_coordinates.x() * viewport_half_width, + viewport_center_y + ndc_coordinates.y() * viewport_half_height, + depth_halfway + ndc_coordinates.z() * depth_half_range, + one_over_w, + }; + }; + + // Process points + if (primitive_type == GPU::PrimitiveType::Points) { + m_clipper.clip_points_against_frustum(vertices); + for (auto& vertex : vertices) { + calculate_vertex_window_coordinates(vertex); + rasterize_point(vertex); + } + return; + } + + // Process lines, line loop and line strips + auto rasterize_line_segment = [&](GPU::Vertex& from, GPU::Vertex& to) { + if (!m_clipper.clip_line_against_frustum(from, to)) + return; + + calculate_vertex_window_coordinates(from); + calculate_vertex_window_coordinates(to); + + rasterize_line(from, to); + }; + if (primitive_type == GPU::PrimitiveType::Lines) { + if (vertices.size() < 2) + return; + for (size_t i = 0; i < vertices.size() - 1; i += 2) + rasterize_line_segment(vertices[i], vertices[i + 1]); + return; + } else if (primitive_type == GPU::PrimitiveType::LineLoop) { + if (vertices.size() < 2) + return; + for (size_t i = 0; i < vertices.size(); ++i) + rasterize_line_segment(vertices[i], vertices[(i + 1) % vertices.size()]); + return; + } else if (primitive_type == GPU::PrimitiveType::LineStrip) { + if (vertices.size() < 2) + return; + for (size_t i = 0; i < vertices.size() - 1; ++i) + rasterize_line_segment(vertices[i], vertices[i + 1]); + return; + } // Let's construct some triangles + m_triangle_list.clear_with_capacity(); + m_processed_triangles.clear_with_capacity(); if (primitive_type == GPU::PrimitiveType::Triangles) { Triangle triangle; if (vertices.size() < 3) @@ -720,163 +1134,8 @@ void Device::draw_primitives(GPU::PrimitiveType primitive_type, FloatMatrix4x4 c } } - // Set up normals transform by taking the upper left 3x3 elements from the model view matrix - // See section 2.11.3 of the OpenGL 1.5 spec - auto normal_transform = model_view_transform.submatrix_from_topleft<3>().transpose().inverse(); - - // Now let's transform each triangle and send that to the GPU - auto const viewport = m_options.viewport; - auto const viewport_half_width = viewport.width() / 2.0f; - auto const viewport_half_height = viewport.height() / 2.0f; - auto const viewport_center_x = viewport.x() + viewport_half_width; - auto const viewport_center_y = viewport.y() + viewport_half_height; - auto const depth_half_range = (m_options.depth_max - m_options.depth_min) / 2; - auto const depth_halfway = (m_options.depth_min + m_options.depth_max) / 2; + // Clip triangles for (auto& triangle : m_triangle_list) { - // Transform vertices into eye coordinates using the model-view transform - triangle.vertices[0].eye_coordinates = model_view_transform * triangle.vertices[0].position; - triangle.vertices[1].eye_coordinates = model_view_transform * triangle.vertices[1].position; - triangle.vertices[2].eye_coordinates = model_view_transform * triangle.vertices[2].position; - - // Transform normals before use in lighting - triangle.vertices[0].normal = normal_transform * triangle.vertices[0].normal; - triangle.vertices[1].normal = normal_transform * triangle.vertices[1].normal; - triangle.vertices[2].normal = normal_transform * triangle.vertices[2].normal; - if (m_options.normalization_enabled) { - triangle.vertices[0].normal.normalize(); - triangle.vertices[1].normal.normalize(); - triangle.vertices[2].normal.normalize(); - } - - // Calculate per-vertex lighting - if (m_options.lighting_enabled) { - auto const& material = m_materials.at(0); - for (auto& vertex : triangle.vertices) { - auto ambient = material.ambient; - auto diffuse = material.diffuse; - auto emissive = material.emissive; - auto specular = material.specular; - - if (m_options.color_material_enabled - && (m_options.color_material_face == GPU::ColorMaterialFace::Front || m_options.color_material_face == GPU::ColorMaterialFace::FrontAndBack)) { - switch (m_options.color_material_mode) { - case GPU::ColorMaterialMode::Ambient: - ambient = vertex.color; - break; - case GPU::ColorMaterialMode::AmbientAndDiffuse: - ambient = vertex.color; - diffuse = vertex.color; - break; - case GPU::ColorMaterialMode::Diffuse: - diffuse = vertex.color; - break; - case GPU::ColorMaterialMode::Emissive: - emissive = vertex.color; - break; - case GPU::ColorMaterialMode::Specular: - specular = vertex.color; - break; - } - } - - FloatVector4 result_color = emissive + (ambient * m_lighting_model.scene_ambient_color); - - for (auto const& light : m_lights) { - if (!light.is_enabled) - continue; - - // We need to save the length here because the attenuation factor requires a non-normalized vector! - auto sgi_arrow_operator = [](FloatVector4 const& p1, FloatVector4 const& p2, float& output_length) { - FloatVector3 light_vector; - if ((p1.w() != 0.f) && (p2.w() == 0.f)) - light_vector = p2.xyz(); - else if ((p1.w() == 0.f) && (p2.w() != 0.f)) - light_vector = -p1.xyz(); - else - light_vector = p2.xyz() - p1.xyz(); - - output_length = light_vector.length(); - if (output_length == 0.f) - return light_vector; - return light_vector / output_length; - }; - - auto sgi_dot_operator = [](FloatVector3 const& d1, FloatVector3 const& d2) { - return AK::max(d1.dot(d2), 0.0f); - }; - - float vertex_to_light_length = 0.f; - FloatVector3 vertex_to_light = sgi_arrow_operator(vertex.eye_coordinates, light.position, vertex_to_light_length); - - // Light attenuation value. - float light_attenuation_factor = 1.0f; - if (light.position.w() != 0.0f) - light_attenuation_factor = 1.0f / (light.constant_attenuation + (light.linear_attenuation * vertex_to_light_length) + (light.quadratic_attenuation * vertex_to_light_length * vertex_to_light_length)); - - // Spotlight factor - float spotlight_factor = 1.0f; - if (light.spotlight_cutoff_angle != 180.0f) { - auto const vertex_to_light_dot_spotlight_direction = sgi_dot_operator(vertex_to_light, light.spotlight_direction.normalized()); - auto const cos_spotlight_cutoff = AK::cos(light.spotlight_cutoff_angle * AK::Pi / 180.f); - - if (vertex_to_light_dot_spotlight_direction >= cos_spotlight_cutoff) - spotlight_factor = AK::pow(vertex_to_light_dot_spotlight_direction, light.spotlight_exponent); - else - spotlight_factor = 0.0f; - } - - // FIXME: The spec allows for splitting the colors calculated here into multiple different colors (primary/secondary color). Investigate what this means. - (void)m_lighting_model.color_control; - - // FIXME: Two sided lighting should be implemented eventually (I believe this is where the normals are -ve and then lighting is calculated with the BACK material) - (void)m_lighting_model.two_sided_lighting; - - // Ambient - auto const ambient_component = ambient * light.ambient_intensity; - - // Diffuse - auto const normal_dot_vertex_to_light = sgi_dot_operator(vertex.normal, vertex_to_light); - auto const diffuse_component = diffuse * light.diffuse_intensity * normal_dot_vertex_to_light; - - // Specular - FloatVector4 specular_component = { 0.0f, 0.0f, 0.0f, 0.0f }; - if (normal_dot_vertex_to_light > 0.0f) { - FloatVector3 half_vector_normalized; - if (!m_lighting_model.viewer_at_infinity) { - half_vector_normalized = vertex_to_light + FloatVector3(0.0f, 0.0f, 1.0f); - } else { - auto const vertex_to_eye_point = sgi_arrow_operator(vertex.eye_coordinates, { 0.f, 0.f, 0.f, 1.f }, vertex_to_light_length); - half_vector_normalized = vertex_to_light + vertex_to_eye_point; - } - half_vector_normalized.normalize(); - - auto const normal_dot_half_vector = sgi_dot_operator(vertex.normal, half_vector_normalized); - auto const specular_coefficient = AK::pow(normal_dot_half_vector, material.shininess); - specular_component = specular * light.specular_intensity * specular_coefficient; - } - - auto color = ambient_component + diffuse_component + specular_component; - color = color * light_attenuation_factor * spotlight_factor; - result_color += color; - } - - vertex.color = result_color; - vertex.color.set_w(diffuse.w()); // OpenGL 1.5 spec, page 59: "The A produced by lighting is the alpha value associated with diffuse color material" - vertex.color.clamp(0.0f, 1.0f); - } - } - - // Transform eye coordinates into clip coordinates using the projection transform - triangle.vertices[0].clip_coordinates = projection_transform * triangle.vertices[0].eye_coordinates; - triangle.vertices[1].clip_coordinates = projection_transform * triangle.vertices[1].eye_coordinates; - triangle.vertices[2].clip_coordinates = projection_transform * triangle.vertices[2].eye_coordinates; - - // At this point, we're in clip space - // Here's where we do the clipping. This is a really crude implementation of the - // https://learnopengl.com/Getting-started/Coordinate-Systems - // "Note that if only a part of a primitive e.g. a triangle is outside the clipping volume OpenGL - // will reconstruct the triangle as one or more triangles to fit inside the clipping range. " - m_clipped_vertices.clear_with_capacity(); m_clipped_vertices.append(triangle.vertices[0]); m_clipped_vertices.append(triangle.vertices[1]); @@ -886,24 +1145,8 @@ void Device::draw_primitives(GPU::PrimitiveType primitive_type, FloatMatrix4x4 c if (m_clipped_vertices.size() < 3) continue; - for (auto& vec : m_clipped_vertices) { - // To normalized device coordinates (NDC) - auto const one_over_w = 1 / vec.clip_coordinates.w(); - auto const ndc_coordinates = FloatVector4 { - vec.clip_coordinates.x() * one_over_w, - vec.clip_coordinates.y() * one_over_w, - vec.clip_coordinates.z() * one_over_w, - one_over_w, - }; - - // To window coordinates - vec.window_coordinates = { - viewport_center_x + ndc_coordinates.x() * viewport_half_width, - viewport_center_y + ndc_coordinates.y() * viewport_half_height, - depth_halfway + ndc_coordinates.z() * depth_half_range, - ndc_coordinates.w(), - }; - } + for (auto& vertex : m_clipped_vertices) + calculate_vertex_window_coordinates(vertex); Triangle tri; tri.vertices[0] = m_clipped_vertices[0]; @@ -914,57 +1157,8 @@ void Device::draw_primitives(GPU::PrimitiveType primitive_type, FloatMatrix4x4 c } } - // Generate texture coordinates if at least one coordinate is enabled - bool texture_coordinate_generation_enabled = false; - for (auto const coordinates_enabled : m_options.texcoord_generation_enabled_coordinates) { - if (coordinates_enabled != GPU::TexCoordGenerationCoordinate::None) { - texture_coordinate_generation_enabled = true; - break; - } - } - - for (auto& triangle : m_processed_triangles) { - triangle.subpixel_coordinates[0] = (triangle.vertices[0].window_coordinates.xy() * subpixel_factor).to_rounded(); - triangle.subpixel_coordinates[1] = (triangle.vertices[1].window_coordinates.xy() * subpixel_factor).to_rounded(); - triangle.subpixel_coordinates[2] = (triangle.vertices[2].window_coordinates.xy() * subpixel_factor).to_rounded(); - - auto triangle_area = edge_function(triangle.subpixel_coordinates[0], triangle.subpixel_coordinates[1], triangle.subpixel_coordinates[2]); - if (triangle_area == 0) - continue; - - if (m_options.enable_culling) { - bool is_front = (m_options.front_face == GPU::WindingOrder::CounterClockwise ? triangle_area > 0 : triangle_area < 0); - - if (!is_front && m_options.cull_back) - continue; - - if (is_front && m_options.cull_front) - continue; - } - - // Force counter-clockwise ordering of vertices - if (triangle_area < 0) { - swap(triangle.vertices[0], triangle.vertices[1]); - swap(triangle.subpixel_coordinates[0], triangle.subpixel_coordinates[1]); - triangle_area *= -1; - } - triangle.area = triangle_area; - - if (texture_coordinate_generation_enabled) { - generate_texture_coordinates(triangle.vertices[0], m_options); - generate_texture_coordinates(triangle.vertices[1], m_options); - generate_texture_coordinates(triangle.vertices[2], m_options); - } - - // Apply texture transformation - for (size_t i = 0; i < GPU::NUM_SAMPLERS; ++i) { - triangle.vertices[0].tex_coords[i] = texture_transform * triangle.vertices[0].tex_coords[i]; - triangle.vertices[1].tex_coords[i] = texture_transform * triangle.vertices[1].tex_coords[i]; - triangle.vertices[2].tex_coords[i] = texture_transform * triangle.vertices[2].tex_coords[i]; - } - + for (auto& triangle : m_processed_triangles) rasterize_triangle(triangle); - } } ALWAYS_INLINE void Device::shade_fragments(PixelQuad& quad) @@ -1033,6 +1227,9 @@ ALWAYS_INLINE void Device::shade_fragments(PixelQuad& quad) quad.out_color.set_y(mix(fog_color.y(), quad.out_color.y(), factor)); quad.out_color.set_z(mix(fog_color.z(), quad.out_color.z(), factor)); } + + // Multiply coverage with the fragment's alpha to obtain the final alpha value + quad.out_color.set_w(quad.out_color.w() * quad.coverage); } ALWAYS_INLINE bool Device::test_alpha(PixelQuad& quad) diff --git a/Userland/Libraries/LibSoftGPU/Device.h b/Userland/Libraries/LibSoftGPU/Device.h index 4e82e958e46..715170db5f5 100644 --- a/Userland/Libraries/LibSoftGPU/Device.h +++ b/Userland/Libraries/LibSoftGPU/Device.h @@ -47,7 +47,7 @@ public: virtual GPU::DeviceInfo info() const override; - virtual void draw_primitives(GPU::PrimitiveType, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform, FloatMatrix4x4 const& texture_transform, Vector const& vertices, Vector const& enabled_texture_units) override; + virtual void draw_primitives(GPU::PrimitiveType, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform, FloatMatrix4x4 const& texture_transform, Vector& vertices, Vector const& enabled_texture_units) override; virtual void resize(Gfx::IntSize const& min_size) override; virtual void clear_color(FloatVector4 const&) override; virtual void clear_depth(GPU::DepthType) override; @@ -74,10 +74,22 @@ public: virtual void set_raster_position(FloatVector4 const& position, FloatMatrix4x4 const& model_view_transform, FloatMatrix4x4 const& projection_transform) override; private: + void calculate_vertex_lighting(GPU::Vertex& vertex) const; void draw_statistics_overlay(Gfx::Bitmap&); Gfx::IntRect get_rasterization_rect_of_size(Gfx::IntSize size) const; - void rasterize_triangle(Triangle const&); + template + void rasterize(Gfx::IntRect& render_bounds, CB1 set_coverage_mask, CB2 set_quad_depth, CB3 set_quad_attributes); + + void rasterize_line_aliased(GPU::Vertex&, GPU::Vertex&); + void rasterize_line_antialiased(GPU::Vertex&, GPU::Vertex&); + void rasterize_line(GPU::Vertex&, GPU::Vertex&); + + void rasterize_point_aliased(GPU::Vertex&); + void rasterize_point_antialiased(GPU::Vertex&); + void rasterize_point(GPU::Vertex&); + + void rasterize_triangle(Triangle&); void setup_blend_factors(); void shade_fragments(PixelQuad&); bool test_alpha(PixelQuad&); diff --git a/Userland/Libraries/LibSoftGPU/PixelQuad.h b/Userland/Libraries/LibSoftGPU/PixelQuad.h index 399130674b5..af3ace084f9 100644 --- a/Userland/Libraries/LibSoftGPU/PixelQuad.h +++ b/Userland/Libraries/LibSoftGPU/PixelQuad.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, Stephan Unverwerth + * Copyright (c) 2022, Jelle Raaijmakers * * SPDX-License-Identifier: BSD-2-Clause */ @@ -7,6 +8,7 @@ #pragma once #include +#include #include #include #include @@ -14,15 +16,20 @@ namespace SoftGPU { +using AK::SIMD::expand4; +using AK::SIMD::f32x4; +using AK::SIMD::i32x4; + struct PixelQuad final { - Vector2 screen_coordinates; - Vector3 barycentrics; - AK::SIMD::f32x4 depth; - Vector4 vertex_color; - Array, GPU::NUM_SAMPLERS> texture_coordinates; - Vector4 out_color; - AK::SIMD::f32x4 fog_depth; - AK::SIMD::i32x4 mask; + Vector2 screen_coordinates; + Vector3 barycentrics; + f32x4 depth; + Vector4 vertex_color; + Array, GPU::NUM_SAMPLERS> texture_coordinates; + Vector4 out_color; + f32x4 fog_depth; + i32x4 mask; + f32x4 coverage { expand4(1.f) }; }; } diff --git a/Userland/Libraries/LibSoftGPU/SIMD.h b/Userland/Libraries/LibSoftGPU/SIMD.h index e24c198c372..59811666981 100644 --- a/Userland/Libraries/LibSoftGPU/SIMD.h +++ b/Userland/Libraries/LibSoftGPU/SIMD.h @@ -103,6 +103,11 @@ ALWAYS_INLINE static Vector2 ddy(Vector2 const }; } +ALWAYS_INLINE static AK::SIMD::f32x4 length(Vector2 const& v) +{ + return AK::SIMD::sqrt(v.dot(v)); +} + // Calculates a quadratic approximation of log2, exploiting the fact that IEEE754 floats are represented as mantissa * 2^exponent. // See https://stackoverflow.com/questions/9411823/fast-log2float-x-implementation-c ALWAYS_INLINE static AK::SIMD::f32x4 log2_approximate(AK::SIMD::f32x4 v) @@ -124,4 +129,12 @@ ALWAYS_INLINE static AK::SIMD::f32x4 log2_approximate(AK::SIMD::f32x4 v) return log; } +ALWAYS_INLINE static Vector2 to_vec2_f32x4(Vector2 const& v) +{ + return { + AK::SIMD::to_f32x4(v.x()), + AK::SIMD::to_f32x4(v.y()), + }; +} + } diff --git a/Userland/Libraries/LibSoftGPU/Triangle.h b/Userland/Libraries/LibSoftGPU/Triangle.h index b5903ffe907..ff005b35f11 100644 --- a/Userland/Libraries/LibSoftGPU/Triangle.h +++ b/Userland/Libraries/LibSoftGPU/Triangle.h @@ -15,8 +15,6 @@ namespace SoftGPU { struct Triangle { GPU::Vertex vertices[3]; - IntVector2 subpixel_coordinates[3]; - i32 area; }; }