Compare commits

..

6 Commits

Author SHA1 Message Date
Caio Oliveira
a443a815a3 updates
Signed-off-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
2025-12-25 23:44:07 -03:00
lizzie
82338a464c fx 2025-12-25 23:34:05 -03:00
lizzie
65054c33e9 [vk] remove redundant "enable" of sample shading 2025-12-25 23:34:05 -03:00
lizzie
370997f42e [externals/ffmpeg] remove --disable-postproc causing issues in OpenOrbis toolchain (#3203)
why was disable-postproc added? either way this fixes build errors not only on PS4 but also on Haiku i think

Signed-off-by: lizzie lizzie@eden-emu.dev

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3203
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
2025-12-26 02:36:08 +01:00
crueter
5213cc5689 Revert "[vk] Correct polygon draw topology mapping for line and point modes (#2834)" (#3158)
This reverts commit 6ba25b6cc0.

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3158
Reviewed-by: Maufeat <sahyno1996@gmail.com>
Reviewed-by: Lizzie <lizzie@eden-emu.dev>
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
2025-12-26 02:33:53 +01:00
lizzie
bc9af86269 [externals] update Vulkan headers from 1.4.328.1 -> 1.4.335.0 (#3202)
notably adds access to `VK_KHR_maintenance10` :)
I'm not sure if we want to update vk as regularly as with other deps as the only worthwhile change I saw was the addition of maintainance10
Signed-off-by: lizzie lizzie@eden-emu.dev

Co-authored-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3202
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Reviewed-by: crueter <crueter@eden-emu.dev>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
2025-12-25 20:48:28 +01:00
51 changed files with 2815 additions and 662 deletions

View File

@@ -96,8 +96,8 @@
"package": "VVL",
"repo": "KhronosGroup/Vulkan-ValidationLayers",
"tag": "vulkan-sdk-%VERSION%",
"git_version": "1.4.328.1",
"git_version": "1.4.335.0",
"artifact": "android-binaries-%VERSION%.zip",
"hash": "5ec895a453cb7c2f156830b9766953a0c2bd44dea99e6a3dac4160305041ccd3e87534b4ce0bd102392178d2a8eca48411856298f9395e60117cdfe89f72137e"
"hash": "48167c4a17736301bd08f9290f41830443e1f18cce8ad867fc6f289b49e18b40e93c9850b377951af82f51b5b6d7313aa6a884fc5df79f5ce3df82696c1c1244"
}
}

View File

@@ -13,4 +13,3 @@ This contains documentation created by developers. This contains build instructi
- **[The NVIDIA SM86 (Maxwell) GPU](./NvidiaGpu.md)**
- **[User Handbook](./user)**
- **[Release Policy](./ReleasePolicy.md)**
- **[Dynarmic](./dynarmic)**

View File

@@ -119,10 +119,10 @@
"package": "VulkanUtilityLibraries",
"repo": "scripts/VulkanUtilityHeaders",
"tag": "%VERSION%",
"git_version": "1.4.328",
"git_version": "1.4.335",
"artifact": "VulkanUtilityHeaders.tar.zst",
"git_host": "git.crueter.xyz",
"hash": "9922217b39faf73cd4fc1510f2fdba14a49aa5c0d77f9ee24ee0512cef16b234d0cabc83c1fec861fa5df1d43e7f086ca9b6501753899119f39c5ca530cb0dae"
"hash": "16dac0e6586702580c4279e4cd37ffe3cf909c93eb31b5069da7af36436d47b270a9cbaac953bb66c22ed12ed67ffa096688599267f307dfb62be1bc09f79833"
},
"spirv-tools": {
"package": "SPIRV-Tools",

View File

@@ -217,7 +217,6 @@ else()
--disable-ffmpeg
--disable-ffprobe
--disable-network
--disable-postproc
--disable-swresample
--enable-decoder=h264
--enable-decoder=vp8

View File

@@ -31,7 +31,6 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
RENDERER_VERTEX_INPUT_DYNAMIC_STATE("vertex_input_dynamic_state"),
RENDERER_PROVOKING_VERTEX("provoking_vertex"),
RENDERER_DESCRIPTOR_INDEXING("descriptor_indexing"),
RENDERER_SAMPLE_SHADING("sample_shading"),
PICTURE_IN_PICTURE("picture_in_picture"),
USE_CUSTOM_RTC("custom_rtc_enabled"),
BLACK_BACKGROUNDS("black_backgrounds"),

View File

@@ -160,13 +160,6 @@ abstract class SettingsItem(
descriptionId = R.string.descriptor_indexing_description
)
)
put(
SwitchSetting(
BooleanSetting.RENDERER_SAMPLE_SHADING,
titleId = R.string.sample_shading,
descriptionId = R.string.sample_shading_description
)
)
put(
SliderSetting(
IntSetting.RENDERER_SAMPLE_SHADING_FRACTION,

View File

@@ -453,12 +453,11 @@ class SettingsFragmentPresenter(
private fun addEdenVeilSettings(sl: ArrayList<SettingsItem>) {
sl.apply {
add(HeaderSetting(R.string.veil_extensions))
add(IntSetting.RENDERER_SAMPLE_SHADING_FRACTION.key)
add(ByteSetting.RENDERER_DYNA_STATE.key)
add(BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE.key)
add(BooleanSetting.RENDERER_PROVOKING_VERTEX.key)
add(BooleanSetting.RENDERER_DESCRIPTOR_INDEXING.key)
add(BooleanSetting.RENDERER_SAMPLE_SHADING.key)
add(IntSetting.RENDERER_SAMPLE_SHADING_FRACTION.key)
add(HeaderSetting(R.string.veil_renderer))
add(IntSetting.DMA_ACCURACY.key)

View File

@@ -96,8 +96,6 @@
<string name="provoking_vertex_description">يحسن الإضاءة ومعالجة الرؤوس في بعض الألعاب. مدعوم فقط على وحدات معالجة الرسومات Vulkan 1.0+.</string>
<string name="descriptor_indexing">فهرسة الوصف</string>
<string name="descriptor_indexing_description">يحسن معالجة النسيج والمخزن المؤقت، بالإضافة إلى طبقة الترجمة Maxwell. مدعوم من بعض وحدات معالجة الرسومات Vulkan 1.1 وجميع وحدات معالجة الرسومات Vulkan 1.2+.</string>
<string name="sample_shading">تظليل العينة</string>
<string name="sample_shading_description">يسمح لتظلل الأجزاء بتنفيذ كل عينة في جزء متعدد العينات بدلاً من مرة واحدة لكل جزء. يحسن جودة الرسومات على حساب بعض الأداء. لا تدعم هذه الإضافة سوى أجهزة Vulkan 1.1+ فقط.</string>
<string name="sample_shading_fraction">نسبة التظليل النموذجية</string>
<string name="sample_shading_fraction_description">كثافة تمرير تظليل العينة. تؤدي القيم الأعلى إلى تحسين الجودة بشكل أكبر، ولكنها تقلل أيضًا من الأداء إلى حد كبير.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">يحسن الإضاءة ومعالجة الرؤوس في بعض الألعاب. مدعوم فقط على وحدات معالجة الرسومات التي تدعم فولكان 1.0+.</string>
<string name="descriptor_indexing">فهرسة الواصفات</string>
<string name="descriptor_indexing_description">يحسن معالجة القوام والمخازن المؤقتة، بالإضافة إلى طبقة ترجمة ماكسويل. مدعوم من قبل بعض وحدات معالجة الرسومات التي تدعم فولكان 1.1 وجميع وحدات معالجة الرسومات التي تدعم فولكان 1.2+.</string>
<string name="sample_shading">سێبەندی نمونە</string>
<string name="sample_shading_description">ڕێگە بە شێدەری پارچە دەدات کە بۆ هەر نمونەیەک لە پارچەی فرە نمونەیدا جێبەجێ بکات لە جیاتی جێبەجێکردنی بۆ هەر پارچەیەک. جۆرایی گرافیک باشتر دەکات بە بەهای هەندێک لە کارایی. تەنها ئامێرەکانی Vulkan 1.1+ پشتگیری ئەم درێژکراوە دەکەن.</string>
<string name="sample_shading_fraction">پێکهاتەی سێبەرکردنی نموونە</string>
<string name="sample_shading_fraction_description">چڕی تێپەڕاندنی سێبەرکردنی نموونە. بەهای زیاتر کوالێتی باشتر دەکات بەڵام کارایی زیاتر کەم دەکاتەوە.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">Zlepšuje osvětlení a zpracování vertexů v některých hrách. Podporováno pouze na GPU s Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexování deskriptorů</string>
<string name="descriptor_indexing_description">Zlepšuje zpracování textur a bufferů, stejně jako Maxwell překladovou vrstvu. Podporováno některými GPU s Vulkan 1.1 a všemi GPU s Vulkan 1.2+.</string>
<string name="sample_shading">Vzorkovací stínování</string>
<string name="sample_shading_description">Umožňuje fragment shaderu provádět výpočty pro každý vzorek ve fragmentu s více vzorky namísto jednou pro fragment. Zlepšuje kvalitu grafiky na úkor výkonu. Tuto funkci podporují pouze zařízení s Vulkan 1.1+.</string>
<string name="sample_shading_fraction">Podíl stínování vzorku</string>
<string name="sample_shading_fraction_description">Intenzita průchodu stínování vzorku. Vyšší hodnoty zlepšují kvalitu, ale také výrazněji snižují výkon.</string>

View File

@@ -78,8 +78,6 @@
<string name="provoking_vertex_description">Verbessert die Beleuchtung und die Vertex-Verarbeitung in einigen Spielen. Wird nur von GPUs mit Vulkan 1.0+ unterstützt.</string>
<string name="descriptor_indexing">Deskriptor-Indizierung</string>
<string name="descriptor_indexing_description">Verbessert die Textur- und Puffer-Verarbeitung sowie die Maxwell-Übersetzungsschicht. Wird von einigen Vulkan 1.1-GPUs und allen Vulkan 1.2+-GPUs unterstützt.</string>
<string name="sample_shading">Sample Shading</string>
<string name="sample_shading_description">Ermöglicht dem Fragment-Shader, pro Sample in einem mehrfach gesampleten Fragment ausgeführt zu werden, anstatt einmal pro Fragment. Verbessert die Grafikqualität auf Kosten der Leistung. Nur Vulkan 1.1+-Geräte unterstützen diese Erweiterung.</string>
<string name="sample_shading_fraction">Sample-Shading-Anteil</string>
<string name="sample_shading_fraction_description">Die Intensität des Sample-Shading-Durchgangs. Höhere Werte verbessern die Qualität stärker, beeinträchtigen aber auch die Leistung stärker.</string>

View File

@@ -91,8 +91,6 @@
<string name="provoking_vertex_description">Mejora la iluminación y el manejo de vértices en ciertos juegos. Solo es compatible con las GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexación del descriptor</string>
<string name="descriptor_indexing_description">Mejora la textura y el manejo del búfer, así como la capa de traducción Maxwell. Compatible con algunas GPU Vulkan 1.1 y todas las GPU Vulkan 1.2+.</string>
<string name="sample_shading">Sombreado de muestra</string>
<string name="sample_shading_description">Permite que el sombreador de fragmentos se ejecute por muestra en un fragmento de múltiples muestras en lugar de una vez por fragmento. Mejora la calidad gráfica a costa de algo de rendimiento. Solo los dispositivos Vulkan 1.1+ admiten esta extensión.</string>
<string name="sample_shading_fraction">Fracción de sombreado de muestra</string>
<string name="sample_shading_fraction_description">La intensidad del paso de sombreado de la muestra. Los valores más altos mejoran más la calidad, pero también reducen el rendimiento en mayor medida.</string>

View File

@@ -104,8 +104,6 @@
<string name="fast_cpu_time_description">از Boost (1700MHz) برای کار با بالاترین سرعت کلاک بومی سوئیچ یا Fast (2000MHz) برای کار با دو برابر سرعت استفاده کنید.</string>
<string name="memory_layout">چیدمان حافظه</string>
<string name="memory_layout_description">(آزمایشی) چیدمان حافظه شبیه‌سازی شده را تغییر می‌دهد. این تنظیم عملکرد را افزایش نمی‌دهد، اما ممکن است به بازی‌هایی که از رزولوشن بالا با استفاده از مادها استفاده می‌کنند کمک کند. در تلفن‌های با 8 گیگابایت رم یا کمتر استفاده نشود.</string>
<string name="sample_shading">سایه‌زنی نمونه</string>
<string name="sample_shading_description">اجازه می‌دهد شیدر قطعه در هر نمونه از یک قطعه چندنمونه‌ای اجرا شود به جای یک بار برای هر قطعه. کیفیت گرافیک را به بهای کاهش عملکرد بهبود می‌بخشد. فقط دستگاه‌های Vulkan 1.1+ از این افزونه پشتیبانی می‌کنند.</string>
<string name="sample_shading_fraction">کسر سایه‌زنی نمونه</string>
<string name="sample_shading_fraction_description">شدت مرحله سایه‌زنی نمونه. مقادیر بالاتر کیفیت را بیشتر بهبود می‌بخشد اما عملکرد را نیز به میزان بیشتری کاهش می‌دهد.</string>
<string name="custom_cpu_ticks">تیک‌های CPU سفارشی</string>

View File

@@ -93,8 +93,6 @@
<string name="provoking_vertex_description">Améliore l`éclairage et la gestion des vertex dans certains jeux. Pris en charge uniquement par les GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexation des descripteurs</string>
<string name="descriptor_indexing_description">Améliore la gestion des textures et des tampons, ainsi que la couche de traduction Maxwell. Pris en charge par certains GPU Vulkan 1.1 et tous les GPU Vulkan 1.2+.</string>
<string name="sample_shading">Échantillonnage de shading</string>
<string name="sample_shading_description">Permet au fragment shader de s\'exécuter par échantillon dans un fragment multi-échantillonné au lieu d\'une fois par fragment. Améliore la qualité graphique au détriment des performances. Seuls les appareils Vulkan 1.1+ prennent en charge cette extension.</string>
<string name="sample_shading_fraction">Fraction d\'ombrage d\'échantillon</string>
<string name="sample_shading_fraction_description">L\'intensité de la passe d\'ombrage d\'échantillon. Des valeurs plus élevées améliorent davantage la qualité mais réduisent aussi plus fortement les performances.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">משפר תאורה וטיפול בקודקודים במשחקים מסוימים. נתמך רק בכרטיסי מסך עם Vulkan 1.0+.</string>
<string name="descriptor_indexing">אינדוקס תיאורים</string>
<string name="descriptor_indexing_description">משפר טיפול במרקמים ומאגרים, כמו גם בשכבת התרגום של Maxwell. נתמך בחלק מכרטיסי ה-Vulkan 1.1 ובכל כרטיסי ה-Vulkan 1.2+.</string>
<string name="sample_shading">דגימת צל</string>
<string name="sample_shading_description">מאפשר לשברי הצללה לרוץ לכל דגימה בקטע רב-דגימות במקום פעם אחת לקטע. משפר את איכות הגרפיקה במחיר של ביצועים. רק מכשירי Vulkan 1.1+ תומכים בהרחבה זו.</string>
<string name="sample_shading_fraction">שבר הצללה לדוגמה</string>
<string name="sample_shading_fraction_description">עוצמת מעבר ההצללה לדוגמה. ערכים גבוהים יותר משפרים את האיכות יותר אך גם מפחיתים את הביצועים במידה רבה יותר.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">Javítja a világítást és a csúcskezelést bizonyos játékokban. Csak Vulkan 1.0+ GPU-kon támogatott.</string>
<string name="descriptor_indexing">Deskriptor Indexelés</string>
<string name="descriptor_indexing_description">Javítja a textúrák és pufferek kezelését, valamint a Maxwell fordítási réteget. Néhány Vulkan 1.1 GPU és minden Vulkan 1.2+ GPU támogatja.</string>
<string name="sample_shading">Mintavételezés árnyékolás</string>
<string name="sample_shading_description">Lehetővé teszi, hogy a fragment shader mintánként fusson egy többmintás fragmentben a fragmentenkénti futtatás helyett. Javítja a grafikai minőséget a teljesítmény rovására. Csak Vulkan 1.1+ eszközök támogatják ezt a kiterjesztést.</string>
<string name="sample_shading_fraction">Mintavételezés árnyékolási hányad</string>
<string name="sample_shading_fraction_description">A mintavételezés árnyékolási lépés intenzitása. A magasabb értékek jobb minőséget eredményeznek, de nagyobb mértékben csökkentik a teljesítményt.</string>

View File

@@ -91,8 +91,6 @@
<string name="provoking_vertex_description">Meningkatkan pencahayaan dan penanganan vertex di beberapa game. Hanya didukung di GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Pengindeks Deskriptor</string>
<string name="descriptor_indexing_description">Meningkatkan penanganan tekstur dan buffer, serta lapisan terjemahan Maxwell. Didukung oleh beberapa GPU Vulkan 1.1 dan semua GPU Vulkan 1.2+.</string>
<string name="sample_shading">Pencahayaan Sampel</string>
<string name="sample_shading_description">Memungkinkan fragment shader dieksekusi per sampel dalam fragmen multisampel alih-alih sekali per fragmen. Meningkatkan kualitas grafis dengan mengorbankan kinerja. Hanya perangkat Vulkan 1.1+ yang mendukung ekstensi ini.</string>
<string name="sample_shading_fraction">Fraksi Pencahayaan Sampel</string>
<string name="sample_shading_fraction_description">Intensitas proses pencahayaan sampel. Nilai lebih tinggi meningkatkan kualitas lebih baik tetapi juga mengurangi performa lebih besar.</string>

View File

@@ -91,8 +91,6 @@
<string name="provoking_vertex_description">Migliora illuminazione e gestione dei vertici in alcuni giochi. Supportato solo su GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indicizzazione descrittori</string>
<string name="descriptor_indexing_description">Migliora la gestione di texture e buffer, nonché il livello di traduzione Maxwell. Supportato da alcune GPU Vulkan 1.1 e tutte le GPU Vulkan 1.2+.</string>
<string name="sample_shading">Shading campione</string>
<string name="sample_shading_description">Permette al fragment shader di eseguire per campione in un frammento multi-campione invece che una volta per frammento. Migliora la qualità grafica a scapito delle prestazioni. Solo i dispositivi Vulkan 1.1+ supportano questa estensione.</string>
<string name="sample_shading_fraction">Frazione di ombreggiatura campione</string>
<string name="sample_shading_fraction_description">L\'intensità della passata di ombreggiatura campione. Valori più alti migliorano la qualità ma riducono maggiormente le prestazioni.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">特定のゲームで照明と頂点処理を改善します。Vulkan 1.0+ GPUでのみサポートされています。</string>
<string name="descriptor_indexing">ディスクリプタインデキシング</string>
<string name="descriptor_indexing_description">テクスチャとバッファの処理、およびMaxwell翻訳レイヤーを改善します。一部のVulkan 1.1 GPUとすべてのVulkan 1.2+ GPUでサポートされています。</string>
<string name="sample_shading">サンプルシェーディング</string>
<string name="sample_shading_description">マルチサンプルフラグメントでフラグメントシェーダーをフラグメントごとではなくサンプルごとに実行できるようにします。パフォーマンスを犠牲にしてグラフィック品質を向上させます。Vulkan 1.1+デバイスのみがこの拡張機能をサポートしています。</string>
<string name="sample_shading_fraction">サンプルシェーディング率</string>
<string name="sample_shading_fraction_description">サンプルシェーディング処理の強度。高い値ほど品質は向上しますが、パフォーマンスも大きく低下します。</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">일부 게임에서 조명과 버텍스 처리를 개선합니다. Vulkan 1.0+ GPU에서만 지원됩니다.</string>
<string name="descriptor_indexing">디스크립터 인덱싱</string>
<string name="descriptor_indexing_description">텍스처 및 버퍼 처리와 Maxwell 변환 레이어를 개선합니다. 일부 Vulkan 1.1 GPU 및 모든 Vulkan 1.2+ GPU에서 지원됩니다.</string>
<string name="sample_shading">샘플 쉐이딩</string>
<string name="sample_shading_description">멀티샘플 프래그먼트에서 프래그먼트 쉐이더가 프래그먼트당 한 번이 아니라 샘플당 실행되도록 합니다. 성능을 희생하여 그래픽 품질을 향상시킵니다. Vulkan 1.1+ 장치만 이 확장을 지원합니다.</string>
<string name="sample_shading_fraction">샘플 쉐이딩 비율</string>
<string name="sample_shading_fraction_description">샘플 쉐이딩 패스의 강도. 값이 높을수록 품질이 더 향상되지만 성능도 더 크게 저하됩니다.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">Forbedrer belysning og vertexhåndtering i enkelte spill. Støttes kun på Vulkan 1.0+ GPU-er.</string>
<string name="descriptor_indexing">Beskrivelsesindeksering</string>
<string name="descriptor_indexing_description">Forbedrer tekstur- og bufferhåndtering, samt Maxwell-oversettelseslaget. Støttes av noen Vulkan 1.1 GPU-er og alle Vulkan 1.2+ GPU-er.</string>
<string name="sample_shading">Prøvegjengivelse</string>
<string name="sample_shading_description">Lar fragment-shaderen kjøres per prøve i et flerprøvefragment i stedet for en gang per fragment. Forbedrer grafikkvaliteten på bekostning av ytelse. Bare Vulkan 1.1+-enheter støtter denne utvidelsen.</string>
<string name="sample_shading_fraction">Prøveskyggebrøk</string>
<string name="sample_shading_fraction_description">Intensiteten til prøveskyggepasseringen. Høyere verdier forbedrer kvaliteten mer, men reduserer også ytelsen i større grad.</string>

View File

@@ -96,8 +96,6 @@
<string name="provoking_vertex_description">Poprawia oświetlenie i obsługę wierzchołków w niektórych grach. Obsługiwane tylko przez GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indeksowanie deskryptorów</string>
<string name="descriptor_indexing_description">Poprawia obsługę tekstur i buforów oraz warstwę tłumaczenia Maxwell. Obsługiwane przez niektóre GPU Vulkan 1.1 i wszystkie GPU Vulkan 1.2+.</string>
<string name="sample_shading">Cieniowanie próbek</string>
<string name="sample_shading_description">Pozwala shaderowi fragmentów wykonywać się na próbkę w fragmencie wielopróbkowym zamiast raz na fragment. Poprawia jakość grafiki kosztem wydajności. Tylko urządzenia Vulkan 1.1+ obsługują to rozszerzenie.</string>
<string name="sample_shading_fraction">Ułamek cieniowania próbki</string>
<string name="sample_shading_fraction_description">Intensywność przebiegu cieniowania próbki. Wyższe wartości poprawiają jakość, ale także w większym stopniu zmniejszają wydajność.</string>

View File

@@ -91,8 +91,6 @@
<string name="provoking_vertex_description">Vértice Provocante: Melhora a iluminação e o processamento de vértices em certos jogos. Suportado apenas em GPUs com Vulkan 1.0 ou superior.</string>
<string name="descriptor_indexing">Descriptor Indexing</string>
<string name="descriptor_indexing_description">Indexação de Descritores: Melhora o processamento de texturas e buffers, assim como a camada de tradução Maxwell. Suportado por algumas GPUs Vulkan 1.1 e todas as GPUs Vulkan 1.2 ou superiores.</string>
<string name="sample_shading">Sample Shading</string>
<string name="sample_shading_description">Amostragem de Sombreamento: Permite que o shader de fragmento seja processado por cada amostra em fragmentos multiamostrados, em vez de executar uma vez por fragmento, melhorando a qualidade gráfica, porém impactando levemente o desempenho. Funciona apenas em dispositivos Vulkan 1.1 ou superiores.</string>
<string name="sample_shading_fraction">Sample Shading Fraction</string>
<string name="sample_shading_fraction_description">Fração de Sombreamento de Amostra: Define a intensidade do sample shading. Quanto maior, melhor a qualidade, mas maior o impacto no desempenho.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">Melhora a iluminação e o tratamento de vértices em certos jogos. Suportado apenas em GPUs Vulkan 1.0+.</string>
<string name="descriptor_indexing">Indexação de descritores</string>
<string name="descriptor_indexing_description">Melhora o tratamento de texturas e buffers, assim como a camada de tradução Maxwell. Suportado por algumas GPUs Vulkan 1.1 e todas Vulkan 1.2+.</string>
<string name="sample_shading">Amostragem de sombreamento</string>
<string name="sample_shading_description">Permite que o fragment shader seja executado por amostra num fragmento multi-amostrado em vez de uma vez por fragmento. Melhora a qualidade gráfica à custa de desempenho. Apenas dispositivos Vulkan 1.1+ suportam esta extensão.</string>
<string name="sample_shading_fraction">Fração de Sombreamento de Amostra</string>
<string name="sample_shading_fraction_description">A intensidade da passagem de sombreamento de amostra. Valores mais elevados melhoram a qualidade, mas também reduzem o desempenho numa maior medida.</string>

View File

@@ -96,8 +96,6 @@
<string name="provoking_vertex_description">Улучшает освещение и обработку вершин в некоторых играх. Поддерживается только ГПУ с Vulkan 1.0+.</string>
<string name="descriptor_indexing">Индексирование дескрипторов</string>
<string name="descriptor_indexing_description">Улучшает обработку текстур и буферов, а также слой перевода Maxwell. Поддерживается некоторыми ГПУ Vulkan 1.1 и всеми ГПУ Vulkan 1.2+.</string>
<string name="sample_shading">Сэмпловый шейдинг</string>
<string name="sample_shading_description">Позволяет шейдеру фрагментов выполняться на каждый сэмпл в мультисэмпловом фрагменте вместо одного раза на фрагмент. Улучшает качество графики ценой производительности. Только устройства с Vulkan 1.1+ поддерживают это расширение.</string>
<string name="sample_shading_fraction">Доля сэмплового затенения</string>
<string name="sample_shading_fraction_description">Интенсивность прохода сэмплового затенения. Более высокие значения улучшают качество, но и сильнее снижают производительность.</string>

View File

@@ -68,8 +68,6 @@
<string name="provoking_vertex_description">Побољшава осветљење и вертификат руковања у одређеним играма. Подржан само на Вулкану 1.0+ ГПУ-у.</string>
<string name="descriptor_indexing">Индексирање дескриптора</string>
<string name="descriptor_indexing_description">Побољшава текстуру и руковање међуспремника, као и преводилачки слој Маквелл. Подржани од стране неких Вулкана 1.1 ГПУ-а и сви Вулкан 1.2+ ГПУ.</string>
<string name="sample_shading">Семпловање сенчења</string>
<string name="sample_shading_description">Омогућава фрагмент шејдеру да се извршава по узорку у вишеузорачном фрагменту уместо једном по фрагменту. Побољшава квалитет графике на рачун перформанси. Само Vulkan 1.1+ уређаји подржавају ову екстензију.</string>
<string name="sample_shading_fraction">Удео сенчења узорка</string>
<string name="sample_shading_fraction_description">Интензитет проласка сенчења узорка. Веће вредности побољшавају квалитет више, али такође више смањују перформансе.</string>

View File

@@ -96,8 +96,6 @@
<string name="provoking_vertex_description">Покращує освітлення та взаємодію з вершинами у деяких іграх. Лише для ГП з підтримкою Vulkan 1.0+.</string>
<string name="descriptor_indexing">Індексація дескрипторів</string>
<string name="descriptor_indexing_description">Покращує обробку текстур та буферів, а також шар перекладу Maxwell. Підтримується деякими GPU Vulkan 1.1 та всіма GPU Vulkan 1.2+.</string>
<string name="sample_shading">Шейдинг зразків</string>
<string name="sample_shading_description">Дозволяє шейдеру фрагментів виконуватися на кожен семпл у багатосемпловому фрагменті замість одного разу на фрагмент. Покращує якість графіки за рахунок продуктивності. Лише пристрої з Vulkan 1.1+ підтримують це розширення.</string>
<string name="sample_shading_fraction">Частка затінення зразка</string>
<string name="sample_shading_fraction_description">Інтенсивність проходу затінення зразка. Вищі значення покращують якість, але й сильніше знижують продуктивність.</string>

View File

@@ -70,8 +70,6 @@
<string name="provoking_vertex_description">Cải thiện ánh sáng và xử lý đỉnh trong một số trò chơi. Chỉ được hỗ trợ trên GPU Vulkan 1.0+.</string>
<string name="descriptor_indexing">Lập chỉ mục bộ mô tả</string>
<string name="descriptor_indexing_description">Cải thiện xử lý kết cấu và bộ đệm, cũng như lớp dịch Maxwell. Được hỗ trợ bởi một số GPU Vulkan 1.1 và tất cả GPU Vulkan 1.2+.</string>
<string name="sample_shading">Tô bóng mẫu</string>
<string name="sample_shading_description">Cho phép fragment shader thực thi trên mỗi mẫu trong một fragment đa mẫu thay vì một lần mỗi fragment. Cải thiện chất lượng đồ họa với chi phí hiệu suất. Chỉ thiết bị Vulkan 1.1+ hỗ trợ tiện ích mở rộng này.</string>
<string name="sample_shading_fraction">Phần trăm tô bóng mẫu</string>
<string name="sample_shading_fraction_description">Cường độ của bước tô bóng mẫu. Giá trị cao hơn cải thiện chất lượng tốt hơn nhưng cũng giảm hiệu suất nhiều hơn.</string>

View File

@@ -93,8 +93,6 @@
<string name="provoking_vertex_description">改善某些游戏中的光照和顶点处理。仅支持Vulkan 1.0+ GPU。</string>
<string name="descriptor_indexing">描述符索引</string>
<string name="descriptor_indexing_description">改进纹理和缓冲区处理以及Maxwell转换层。部分Vulkan 1.1 GPU和所有Vulkan 1.2+ GPU支持。</string>
<string name="sample_shading">采样着色</string>
<string name="sample_shading_description">允许片段着色器在多采样片段中每个样本执行一次而不是每个片段执行一次。以提高性能为代价改善图形质量。仅Vulkan 1.1+设备支持此扩展。</string>
<string name="sample_shading_fraction">采样着色比例</string>
<string name="sample_shading_fraction_description">采样着色处理的强度。值越高,质量改善越多,但性能降低也越明显。</string>

View File

@@ -96,8 +96,6 @@
<string name="provoking_vertex_description">改善某些遊戲中的光照和頂點處理。僅支援Vulkan 1.0+ GPU。</string>
<string name="descriptor_indexing">描述符索引</string>
<string name="descriptor_indexing_description">改進紋理和緩衝區處理以及Maxwell轉換層。部分Vulkan 1.1 GPU和所有Vulkan 1.2+ GPU支援。</string>
<string name="sample_shading">取樣著色</string>
<string name="sample_shading_description">允許片段著色器在多取樣片段中每個樣本執行一次而不是每個片段執行一次。以提高效能為代價改善圖形品質。僅Vulkan 1.1+裝置支援此擴充功能。</string>
<string name="sample_shading_fraction">採樣著色比例</string>
<string name="sample_shading_fraction_description">採樣著色處理的強度。數值越高,品質改善越多,但效能降低也越明顯。</string>

View File

@@ -101,10 +101,8 @@
<string name="provoking_vertex_description">Improves lighting and vertex handling in certain games. Only supported on Vulkan 1.0+ GPUs.</string>
<string name="descriptor_indexing">Descriptor Indexing</string>
<string name="descriptor_indexing_description">Improves texture and buffer handling, as well as the Maxwell translation layer. Supported by some Vulkan 1.1 GPUs and all Vulkan 1.2+ GPUs.</string>
<string name="sample_shading">Sample Shading</string>
<string name="sample_shading_description">Allows the fragment shader to execute per sample in a multi-sampled fragment instead once per fragment. Improves graphics quality at the cost of some performance. Only Vulkan 1.1+ devices support this extension.</string>
<string name="sample_shading_fraction">Sample Shading Fraction</string>
<string name="sample_shading_fraction_description">The intensity of the sample shading pass. Higher values improve quality more but also reduce performance to a greater extent.</string>
<string name="sample_shading_fraction_description">Allows the fragment shader to execute per sample in a multi-sampled fragment instead of once per fragment. Improves graphics quality at the cost of performance. Higher values improve quality but degrade performance.</string>
<string name="veil_renderer">Renderer</string>
<string name="sync_memory_operations">Sync Memory Operations</string>

View File

@@ -514,6 +514,7 @@ struct Values {
SwitchableSetting<bool> barrier_feedback_loops{linkage, true, "barrier_feedback_loops",
Category::RendererAdvanced};
SwitchableSetting<u32, true> sample_shading_fraction{linkage, 0, 0, 100, "sample_shading_fraction", Category::RendererExtensions, Specialization::Scalar, true};
SwitchableSetting<u8, true> dyna_state{linkage,
#if defined (_WIN32)
3,
@@ -535,17 +536,6 @@ struct Values {
SwitchableSetting<bool> vertex_input_dynamic_state{linkage, true, "vertex_input_dynamic_state", Category::RendererExtensions};
SwitchableSetting<bool> provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions};
SwitchableSetting<bool> descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions};
SwitchableSetting<bool> sample_shading{linkage, false, "sample_shading", Category::RendererExtensions, Specialization::Paired};
SwitchableSetting<u32, true> sample_shading_fraction{linkage,
50,
0,
100,
"sample_shading_fraction",
Category::RendererExtensions,
Specialization::Scalar,
true,
false,
&sample_shading};
Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug};
Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback",

View File

@@ -49,7 +49,7 @@ Important API Changes in v6.x Series
Documentation
-------------
Design documentation can be found at [./Design.md](./Design.md).
Design documentation can be found at [docs/Design.md](docs/Design.md).
Usage Example

View File

@@ -343,279 +343,3 @@ SetTerm(IR::Term::If{cond, term_then, term_else})
This terminal instruction conditionally executes one terminal or another depending
on the run-time state of the ARM flags.
# Register Allocation (x64 Backend)
`HostLoc`s contain values. A `HostLoc` ("host value location") is either a host CPU register or a host spill location.
Values once set cannot be changed. Values can however be moved by the register allocator between `HostLoc`s. This is
handled by the register allocator itself and code that uses the register allocator need not and should not move values
between registers.
The register allocator is based on three concepts: `Use`, `Def` and `Scratch`.
* `Use`: The use of a value.
* `Define`: The definition of a value, this is the only time when a value is set.
* `Scratch`: Allocate a register that can be freely modified as one wishes.
Note that `Use`ing a value decrements its `use_count` by one. When the `use_count` reaches zero the value is discarded and no longer exists.
The member functions on `RegAlloc` are just a combination of the above concepts.
The following registers are reserved for internal use and should NOT participate in register allocation:
- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access.
- `%rsp`: Stack pointer.
- `%r15`: JIT pointer
- `%r14`: Page table pointer.
- `%r13`: Fastmem pointer.
The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate.
Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration.
### `Scratch`
```c++
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
```
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
### Pure `Use`
```c++
Xbyak::Reg64 UseGpr(Argument& arg);
Xbyak::Xmm UseXmm(Argument& arg);
OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
in an unused XMM register, `UseOpArg` might be in a register or might be a memory location, and `Use` allows
you to specify a specific register (GPR or XMM) to use.
This register **must not** have it's value changed.
### `UseScratch`
```c++
Xbyak::Reg64 UseScratchGpr(Argument& arg);
Xbyak::Xmm UseScratchXmm(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
in an unused XMM register, and `UseScratch` allows you to specify a specific register (GPR or XMM) to use.
The return value is the register allocated to you.
You are free to modify the value in the register. The register is discarded at the end of the allocation scope.
### `Define` as register
A `Define` is the defintion of a value. This is the only time when a value may be set.
```c++
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
```
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
value to the specified register `reg`.
### `Define`ing as an alias of a different value
Adding a `Define` to an existing value.
```c++
void DefineValue(IR::Inst* inst, Argument& arg);
```
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
emitted.
## When to use each?
* Prefer `Use` to `UseScratch` where possible.
* Prefer the `OpArg` variants where possible.
* Prefer to **not** use the specific `HostLoc` variants where possible.
# Return Stack Buffer Optimization (x64 Backend)
One of the optimizations that dynarmic does is block-linking. Block-linking is done when
the destination address of a jump is available at JIT-time. Instead of returning to the
dispatcher at the end of a block we can perform block-linking: just jump directly to the
next block. This is beneficial because returning to the dispatcher can often be quite
expensive.
What should we do in cases when we can't predict the destination address? The eponymous
example is when executing a return statement at the end of a function; the return address
is not statically known at compile time.
We deal with this by using a return stack buffer: When we execute a call instruction,
we push our prediction onto the RSB. When we execute a return instruction, we pop a
prediction off the RSB. If the prediction is a hit, we immediately jump to the relevant
compiled block. Otherwise, we return to the dispatcher.
This is the essential idea behind this optimization.
## `UniqueHash`
One complication dynarmic has is that a compiled block is not uniquely identifiable by
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
```c++
u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
}
```
## Our implementation isn't actually a stack
Dynarmic's RSB isn't actually a stack. It was implemented as a ring buffer because
that showed better performance in tests.
### RSB Structure
The RSB is implemented as a ring buffer. `rsb_ptr` is the index of the insertion
point. Each element in `rsb_location_descriptors` is a `UniqueHash` and they
each correspond to an element in `rsb_codeptrs`. `rsb_codeptrs` contains the
host addresses for the corresponding the compiled blocks.
`RSBSize` was chosen by performance testing. Note that this is bigger than the
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
showed degraded performance.
```c++
struct JitState {
// ...
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
// ...
};
```
### RSB Push
We insert our prediction at the insertion point iff the RSB doesn't already
contain a prediction with the same `UniqueHash`.
```c++
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr(code, {HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(code);
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr(code).cvt32();
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
code->mov(loc_desc_reg, u64(imm64));
CodePtr patch_location = code->getCurr<CodePtr>();
patch_unique_hash_locations[imm64].emplace_back(patch_location);
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
code->EnsurePatchLocationSize(patch_location, 10);
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
}
```
In pseudocode:
```c++
for (i := 0 .. RSBSize-1)
if (rsb_location_descriptors[i] == imm64)
goto label;
rsb_ptr++;
rsb_ptr %= RSBSize;
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
label:
```
## RSB Pop
To check if a predicition is in the RSB, we linearly scan the RSB.
```c++
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->shl(rcx, 32);
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->jmp(rax);
}
```
In pseudocode:
```c++
rbx := ComputeUniqueHash()
rax := ReturnToDispatch
for (i := 0 .. RSBSize-1)
if (rbx == rsb_location_descriptors[i])
rax = rsb_codeptrs[i]
goto rax
```
# Fast memory (Fastmem)
The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated.
The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput.
Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only.
![Host to guest translation](./HostToGuest.svg)
![Fastmem translation](./Fastmem.svg)
In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace.
Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs).
Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size.
The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times.

2474
src/dynarmic/docs/Doxyfile Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,19 @@
# Fast memory (Fastmem)
The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated.
The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput.
Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only.
![Host to guest translation](./HostToGuest.svg)
![Fastmem translation](./Fastmem.svg)
In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace.
Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs).
Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size.
The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times.

View File

Before

Width:  |  Height:  |  Size: 128 KiB

After

Width:  |  Height:  |  Size: 128 KiB

View File

Before

Width:  |  Height:  |  Size: 98 KiB

After

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -0,0 +1,97 @@
# Register Allocation (x64 Backend)
`HostLoc`s contain values. A `HostLoc` ("host value location") is either a host CPU register or a host spill location.
Values once set cannot be changed. Values can however be moved by the register allocator between `HostLoc`s. This is
handled by the register allocator itself and code that uses the register allocator need not and should not move values
between registers.
The register allocator is based on three concepts: `Use`, `Def` and `Scratch`.
* `Use`: The use of a value.
* `Define`: The definition of a value, this is the only time when a value is set.
* `Scratch`: Allocate a register that can be freely modified as one wishes.
Note that `Use`ing a value decrements its `use_count` by one. When the `use_count` reaches zero the value is discarded and no longer exists.
The member functions on `RegAlloc` are just a combination of the above concepts.
The following registers are reserved for internal use and should NOT participate in register allocation:
- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access.
- `%rsp`: Stack pointer.
- `%r15`: JIT pointer
- `%r14`: Page table pointer.
- `%r13`: Fastmem pointer.
The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate.
Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration.
### `Scratch`
```c++
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
```
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
### Pure `Use`
```c++
Xbyak::Reg64 UseGpr(Argument& arg);
Xbyak::Xmm UseXmm(Argument& arg);
OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
in an unused XMM register, `UseOpArg` might be in a register or might be a memory location, and `Use` allows
you to specify a specific register (GPR or XMM) to use.
This register **must not** have it's value changed.
### `UseScratch`
```c++
Xbyak::Reg64 UseScratchGpr(Argument& arg);
Xbyak::Xmm UseScratchXmm(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
in an unused XMM register, and `UseScratch` allows you to specify a specific register (GPR or XMM) to use.
The return value is the register allocated to you.
You are free to modify the value in the register. The register is discarded at the end of the allocation scope.
### `Define` as register
A `Define` is the defintion of a value. This is the only time when a value may be set.
```c++
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
```
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
value to the specified register `reg`.
### `Define`ing as an alias of a different value
Adding a `Define` to an existing value.
```c++
void DefineValue(IR::Inst* inst, Argument& arg);
```
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
emitted.
## When to use each?
* Prefer `Use` to `UseScratch` where possible.
* Prefer the `OpArg` variants where possible.
* Prefer to **not** use the specific `HostLoc` variants where possible.

View File

@@ -0,0 +1,157 @@
# Return Stack Buffer Optimization (x64 Backend)
One of the optimizations that dynarmic does is block-linking. Block-linking is done when
the destination address of a jump is available at JIT-time. Instead of returning to the
dispatcher at the end of a block we can perform block-linking: just jump directly to the
next block. This is beneficial because returning to the dispatcher can often be quite
expensive.
What should we do in cases when we can't predict the destination address? The eponymous
example is when executing a return statement at the end of a function; the return address
is not statically known at compile time.
We deal with this by using a return stack buffer: When we execute a call instruction,
we push our prediction onto the RSB. When we execute a return instruction, we pop a
prediction off the RSB. If the prediction is a hit, we immediately jump to the relevant
compiled block. Otherwise, we return to the dispatcher.
This is the essential idea behind this optimization.
## `UniqueHash`
One complication dynarmic has is that a compiled block is not uniquely identifiable by
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
```c++
u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
}
```
## Our implementation isn't actually a stack
Dynarmic's RSB isn't actually a stack. It was implemented as a ring buffer because
that showed better performance in tests.
### RSB Structure
The RSB is implemented as a ring buffer. `rsb_ptr` is the index of the insertion
point. Each element in `rsb_location_descriptors` is a `UniqueHash` and they
each correspond to an element in `rsb_codeptrs`. `rsb_codeptrs` contains the
host addresses for the corresponding the compiled blocks.
`RSBSize` was chosen by performance testing. Note that this is bigger than the
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
showed degraded performance.
```c++
struct JitState {
// ...
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
// ...
};
```
### RSB Push
We insert our prediction at the insertion point iff the RSB doesn't already
contain a prediction with the same `UniqueHash`.
```c++
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr(code, {HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(code);
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr(code).cvt32();
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
code->mov(loc_desc_reg, u64(imm64));
CodePtr patch_location = code->getCurr<CodePtr>();
patch_unique_hash_locations[imm64].emplace_back(patch_location);
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
code->EnsurePatchLocationSize(patch_location, 10);
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
}
```
In pseudocode:
```c++
for (i := 0 .. RSBSize-1)
if (rsb_location_descriptors[i] == imm64)
goto label;
rsb_ptr++;
rsb_ptr %= RSBSize;
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
label:
```
## RSB Pop
To check if a predicition is in the RSB, we linearly scan the RSB.
```c++
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->shl(rcx, 32);
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->jmp(rax);
}
```
In pseudocode:
```c++
rbx := ComputeUniqueHash()
rax := ReturnToDispatch
for (i := 0 .. RSBSize-1)
if (rbx == rsb_location_descriptors[i])
rax = rsb_codeptrs[i]
goto rax
```

View File

@@ -324,6 +324,13 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QObject* parent)
tr("Improves rendering of transparency effects in specific games."));
// Renderer (Extensions)
INSERT(Settings,
sample_shading_fraction,
tr("Sample Shading"),
tr("Allows the fragment shader to execute per sample in a multi-sampled fragment "
"instead of once per fragment. Improves graphics quality at the cost of performance.\n"
"Higher values improve quality but degrade performance."));
INSERT(Settings,
dyna_state,
tr("Extended Dynamic State"),
@@ -346,15 +353,6 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QObject* parent)
tr("Improves texture & buffer handling and the Maxwell translation layer.\n"
"Some Vulkan 1.1+ and all 1.2+ devices support this extension."));
INSERT(Settings, sample_shading, QString(), QString());
INSERT(Settings,
sample_shading_fraction,
tr("Sample Shading"),
tr("Allows the fragment shader to execute per sample in a multi-sampled fragment "
"instead of once per fragment. Improves graphics quality at the cost of performance.\n"
"Higher values improve quality but degrade performance."));
// Renderer (Debug)
// System

View File

@@ -7,7 +7,6 @@
#pragma once
#include <algorithm>
#include <cstring>
#include <memory>
#include <numeric>
@@ -16,8 +15,6 @@
#include "video_core/guest_memory.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/texture_cache/util.h"
#include "video_core/polygon_mode_utils.h"
#include "video_core/renderer_vulkan/line_loop_utils.h"
namespace VideoCommon {
@@ -356,37 +353,14 @@ void BufferCache<P>::UpdateComputeBuffers() {
template <class P>
void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (is_indexed) {
BindHostIndexBuffer();
} else {
if constexpr (!P::IS_OPENGL) {
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
if (draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
polygon_mode == Maxwell::PolygonMode::Line && draw_state.vertex_buffer.count > 1) {
const u32 vertex_count = draw_state.vertex_buffer.count;
const u32 generated_count = vertex_count + 1;
const bool use_u16 = vertex_count <= 0x10000;
const u32 element_size = use_u16 ? sizeof(u16) : sizeof(u32);
auto staging = runtime.UploadStagingBuffer(
static_cast<size_t>(generated_count) * element_size);
std::span<u8> dst_span{staging.mapped_span.data(),
generated_count * static_cast<size_t>(element_size)};
Vulkan::LineLoop::GenerateSequentialWithClosureRaw(dst_span, element_size);
const auto synthetic_format = use_u16 ? Maxwell::IndexFormat::UnsignedShort
: Maxwell::IndexFormat::UnsignedInt;
runtime.BindIndexBuffer(draw_state.topology, synthetic_format,
draw_state.vertex_buffer.first, generated_count,
staging.buffer, static_cast<u32>(staging.offset),
generated_count * element_size);
}
}
if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
draw_state.vertex_buffer.count);
}
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
draw_state.vertex_buffer.count);
}
}
BindHostVertexBuffers();
@@ -715,44 +689,6 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr);
const u32 size = channel_state->index_buffer.size;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if constexpr (!P::IS_OPENGL) {
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
const bool polygon_line =
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
polygon_mode == Maxwell::PolygonMode::Line;
if (polygon_line && draw_state.index_buffer.count > 1) {
const u32 element_size = draw_state.index_buffer.FormatSizeInBytes();
const size_t src_bytes = static_cast<size_t>(draw_state.index_buffer.count) * element_size;
const size_t total_bytes = src_bytes + element_size;
auto staging = runtime.UploadStagingBuffer(total_bytes);
std::span<u8> dst_span{staging.mapped_span.data(), total_bytes};
std::span<const u8> src_span;
if (!draw_state.inline_index_draw_indexes.empty()) {
const u8* const src =
draw_state.inline_index_draw_indexes.data() +
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
src_span = {src, src_bytes};
} else if (const u8* const cpu_base =
device_memory.GetPointer<u8>(channel_state->index_buffer.device_addr)) {
const u8* const src = cpu_base +
static_cast<size_t>(draw_state.index_buffer.first) * element_size;
src_span = {src, src_bytes};
} else {
const DAddr src_addr =
channel_state->index_buffer.device_addr +
static_cast<DAddr>(draw_state.index_buffer.first) * element_size;
device_memory.ReadBlockUnsafe(src_addr, dst_span.data(), src_bytes);
src_span = {dst_span.data(), src_bytes};
}
Vulkan::LineLoop::CopyWithClosureRaw(dst_span, src_span, element_size);
buffer.MarkUsage(offset, size);
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
draw_state.index_buffer.first, draw_state.index_buffer.count + 1,
staging.buffer, static_cast<u32>(staging.offset),
static_cast<u32>(total_bytes));
return;
}
}
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(size);

View File

@@ -1,46 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "video_core/engines/maxwell_3d.h"
namespace VideoCore {
inline Tegra::Engines::Maxwell3D::Regs::PolygonMode EffectivePolygonMode(
const Tegra::Engines::Maxwell3D::Regs& regs) {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
const bool cull_enabled = regs.gl_cull_test_enabled != 0;
const auto cull_face = regs.gl_cull_face;
const bool cull_front = cull_enabled && (cull_face == Maxwell::CullFace::Front ||
cull_face == Maxwell::CullFace::FrontAndBack);
const bool cull_back = cull_enabled && (cull_face == Maxwell::CullFace::Back ||
cull_face == Maxwell::CullFace::FrontAndBack);
const bool render_front = !cull_front;
const bool render_back = !cull_back;
const auto front_mode = regs.polygon_mode_front;
const auto back_mode = regs.polygon_mode_back;
if (render_front && render_back && front_mode != back_mode) {
if (front_mode == Maxwell::PolygonMode::Line || back_mode == Maxwell::PolygonMode::Line) {
return Maxwell::PolygonMode::Line;
}
if (front_mode == Maxwell::PolygonMode::Point || back_mode == Maxwell::PolygonMode::Point) {
return Maxwell::PolygonMode::Point;
}
}
if (render_front) {
return front_mode;
}
if (render_back) {
return back_mode;
}
return front_mode;
}
} // namespace VideoCore

View File

@@ -15,7 +15,6 @@
#include "video_core/engines/draw_manager.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/polygon_mode_utils.h"
namespace Vulkan {
namespace {
@@ -66,7 +65,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(VideoCore::EffectivePolygonMode(regs)));
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==

View File

@@ -1,68 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <algorithm>
#include <cstring>
#include <span>
#include "common/assert.h"
#include "common/common_types.h"
namespace Vulkan::LineLoop {
inline void CopyWithClosureRaw(std::span<u8> dst, std::span<const u8> src, size_t element_size) {
ASSERT_MSG(dst.size() == src.size() + element_size, "Invalid line loop copy sizes");
if (src.empty()) {
if (!dst.empty()) {
std::fill(dst.begin(), dst.end(), u8{0});
}
return;
}
std::memcpy(dst.data(), src.data(), src.size());
std::memcpy(dst.data() + src.size(), src.data(), element_size);
}
inline void GenerateSequentialWithClosureRaw(std::span<u8> dst, size_t element_size,
u64 start_value = 0) {
if (dst.empty()) {
return;
}
const size_t last = dst.size() - element_size;
size_t offset = 0;
u64 value = start_value;
while (offset < last) {
std::memcpy(dst.data() + offset, &value, element_size);
offset += element_size;
++value;
}
std::memcpy(dst.data() + offset, &start_value, element_size);
}
template <typename T>
inline void CopyWithClosure(std::span<T> dst, std::span<const T> src) {
ASSERT_MSG(dst.size() == src.size() + 1, "Invalid destination size for line loop copy");
if (src.empty()) {
if (!dst.empty()) {
dst.front() = {};
}
return;
}
std::copy(src.begin(), src.end(), dst.begin());
dst.back() = src.front();
}
template <typename T>
inline void GenerateSequentialWithClosure(std::span<T> dst, T start_value = {}) {
if (dst.empty()) {
return;
}
const size_t last = dst.size() - 1;
for (size_t i = 0; i < last; ++i) {
dst[i] = static_cast<T>(start_value + static_cast<T>(i));
}
dst.back() = start_value;
}
} // namespace Vulkan::LineLoop

View File

@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -326,9 +323,44 @@ VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
}
VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
Maxwell::PrimitiveTopology topology,
Maxwell::PolygonMode polygon_mode) {
return detail::PrimitiveTopologyNoDevice(topology, polygon_mode);
Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case Maxwell::PrimitiveTopology::Lines:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case Maxwell::PrimitiveTopology::LineLoop:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::LineStrip:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::TriangleStrip:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::LinesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::Quads:
case Maxwell::PrimitiveTopology::QuadStrip:
// TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT
// whenever it releases
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
case Maxwell::PrimitiveTopology::Polygon:
LOG_WARNING(Render_Vulkan, "Draw mode is Polygon with a polygon mode of lines should be a "
"single body and not a bunch of triangles.");
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
}
UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
return {};
}
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,

View File

@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -18,52 +15,6 @@ namespace Vulkan::MaxwellToVK {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
namespace detail {
constexpr VkPrimitiveTopology PrimitiveTopologyNoDevice(Maxwell::PrimitiveTopology topology,
Maxwell::PolygonMode polygon_mode) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case Maxwell::PrimitiveTopology::Lines:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case Maxwell::PrimitiveTopology::LineLoop:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::LineStrip:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::TriangleStrip:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::LinesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
case Maxwell::PrimitiveTopology::Quads:
case Maxwell::PrimitiveTopology::QuadStrip:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
case Maxwell::PrimitiveTopology::Polygon:
switch (polygon_mode) {
case Maxwell::PolygonMode::Fill:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case Maxwell::PolygonMode::Line:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PolygonMode::Point:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
}
break;
}
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
}
} // namespace detail
namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter);
@@ -95,8 +46,7 @@ struct FormatInfo {
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology,
Maxwell::PolygonMode polygon_mode);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size);

View File

@@ -7,7 +7,6 @@
#include <algorithm>
#include <iostream>
#include <span>
#include <string_view>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
@@ -23,7 +22,6 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/polygon_mode_utils.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -616,10 +614,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
vertex_input_ci.pNext = &input_divisor_ci;
}
const bool has_tess_stages = spv_modules[1] || spv_modules[2];
const auto polygon_mode =
FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode.Value());
auto input_assembly_topology =
MaxwellToVK::PrimitiveTopology(device, key.state.topology, polygon_mode);
auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
if (!has_tess_stages) {
LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
@@ -634,33 +629,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
}
}
if (key.state.topology == Maxwell::PrimitiveTopology::Polygon) {
const auto polygon_mode_name = [polygon_mode]() -> std::string_view {
switch (polygon_mode) {
case Maxwell::PolygonMode::Fill:
return "Fill";
case Maxwell::PolygonMode::Line:
return "Line";
case Maxwell::PolygonMode::Point:
return "Point";
}
return "Unknown";
}();
const auto vk_topology_name = [input_assembly_topology]() -> std::string_view {
switch (input_assembly_topology) {
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
return "TriangleFan";
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
return "LineStrip";
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
return "PointList";
default:
return "Unexpected";
}
}();
LOG_DEBUG(Render_Vulkan, "Polygon primitive in {} mode mapped to {}", polygon_mode_name,
vk_topology_name);
}
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -776,8 +744,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
.sampleShadingEnable = Settings::values.sample_shading.GetValue() ? VK_TRUE : VK_FALSE,
.minSampleShading = static_cast<float>(Settings::values.sample_shading_fraction.GetValue()) / 100.0f,
.sampleShadingEnable = Settings::values.sample_shading_fraction.GetValue() > 0 ? VK_TRUE : VK_FALSE,
.minSampleShading = float(Settings::values.sample_shading_fraction.GetValue()) / 100.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = key.state.alpha_to_coverage_enabled != 0 ? VK_TRUE : VK_FALSE,
.alphaToOneEnable = key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,

View File

@@ -37,7 +37,6 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/polygon_mode_utils.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -149,8 +148,7 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3
return scissor;
}
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed,
Maxwell::PolygonMode polygon_mode) {
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) {
DrawParams params{
.base_instance = draw_state.base_instance,
.num_instances = num_instances,
@@ -170,21 +168,6 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
params.base_vertex = 0;
params.is_indexed = true;
}
const bool polygon_line =
draw_state.topology == Maxwell::PrimitiveTopology::Polygon &&
polygon_mode == Maxwell::PolygonMode::Line;
if (polygon_line) {
if (params.is_indexed) {
if (draw_state.index_buffer.count > 1) {
params.num_vertices = draw_state.index_buffer.count + 1;
}
} else if (draw_state.vertex_buffer.count > 1) {
params.num_vertices = draw_state.vertex_buffer.count + 1;
params.is_indexed = true;
params.first_index = 0;
params.base_vertex = draw_state.vertex_buffer.first;
}
}
return params;
}
} // Anonymous namespace
@@ -250,8 +233,7 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count};
const auto polygon_mode = VideoCore::EffectivePolygonMode(maxwell3d->regs);
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed, polygon_mode)};
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,

View File

@@ -14,7 +14,7 @@ try {
Exit 1
}
$VulkanSDKVer = "1.4.328.1"
$VulkanSDKVer = "1.4.335.0"
$VULKAN_SDK = "C:/VulkanSDK/$VulkanSDKVer"
$ExeFile = "vulkansdk-windows-X64-$VulkanSDKVer.exe"
$Uri = "https://sdk.lunarg.com/sdk/download/$VulkanSDKVer/windows/$ExeFile"

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
# SPDX-License-Identifier: GPL-3.0-or-later
: "${VULKAN_SDK_VER:=1.4.328.1}"
: "${VULKAN_SDK_VER:=1.4.335.0}"
: "${VULKAN_ROOT:=C:/VulkanSDK/$VULKAN_SDK_VER}"
EXE_FILE="vulkansdk-windows-X64-$VULKAN_SDK_VER.exe"
URI="https://sdk.lunarg.com/sdk/download/$VULKAN_SDK_VER/windows/$EXE_FILE"