Voxia OS v0.0.1
Hobby Project Operating System Targeting x86-64
Loading...
Searching...
No Matches
simd.c
Go to the documentation of this file.
1#include "hal/cpu/cpuid.h"
2#include "init/init.h"
3#include <type.h>
4#include <libk/serial.h>
5#include <libk/simd.h>
6
7// TODO: move this to each core struct data
10
11void init_simd() {
12 uint32_t eax, ebx, ecx, edx;
13
14 // 1️⃣ Deteksi fitur dasar
15 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
16
17 bool has_xsave = (ecx & (1U << 26)) != 0;
18 bool has_avx = (ecx & (1U << 28)) != 0;
19 bool has_sse = (ecx & (1U << 25)) != 0;
20 bool has_oxsave = (ecx & (1U << 27)) != 0;
21
22 // 2️⃣ Deteksi AVX2 (Leaf 7, subleaf 0)
23 cpuid(7, 0, &eax, &ebx, &ecx, &edx);
24 bool has_avx2 = (ebx & (1U << 5)) != 0;
25
26 if (!has_sse) {
27 LOG2_WARN("SIMD", "SSE not supported, SIMD disabled");
28 simd_has_avx = false;
29 simd_has_avx2 = false;
30 return;
31 }
32
33 // AVX butuh OSXSAVE juga
34 if (has_avx && !has_oxsave)
35 has_avx = false;
36
37 if (has_avx2 && (!has_avx || !has_oxsave))
38 has_avx2 = false;
39
40 // 3️⃣ Aktifkan FPU dan SSE di CR0 & CR4
41 uint64_t cr0_val;
42 uint64_t cr4_val;
43
44 __asm__ volatile("mov %%cr0, %0" : "=r"(cr0_val));
45
46 cr0_val &= ~(1ULL << 2); // Clear EM bit
47 cr0_val |= (1ULL << 1); // Set MP bit
48
49 __asm__ volatile("mov %0, %%cr0" : : "r"(cr0_val));
50
51 __asm__ volatile("mov %%cr4, %0" : "=r"(cr4_val));
52
53 cr4_val |= (1ULL << 9); // OSFXSR
54 cr4_val |= (1ULL << 10); // OSXMMEXCPT
55
56 if (has_xsave)
57 cr4_val |= (1ULL << 18); // OSXSAVE
58
59 __asm__ volatile("mov %0, %%cr4" : : "r"(cr4_val));
60
61 // 4️⃣ Init FPU
62 __asm__ volatile("fninit");
63
64 // 5️⃣ Setup XCR0
65 if (has_xsave) {
66 uint64_t xcr0;
67
68 if (has_avx) {
69 // x87 + SSE + AVX
70 xcr0 = 0b111ULL;
71 simd_has_avx = true;
72 simd_has_avx2 = has_avx2;
73 } else {
74 // x87 + SSE
75 xcr0 = 0b011ULL;
76 simd_has_avx = false;
77 simd_has_avx2 = false;
78 }
79
80 __asm__ volatile("xsetbv"
81 :
82 : "a"((uint32_t) xcr0),
83 "d"((uint32_t) (xcr0 >> 32)), "c"(0)
84 : "memory");
85
86 } else {
87 LOG2_WARN("SIMD",
88 "XSAVE/AVX not supported, fallback to SSE only");
89
90 simd_has_avx = false;
91 simd_has_avx2 = false;
92 }
93
94 if (simd_has_avx2) {
95 LOG2_INFO("SIMD", "AVX2 enabled");
96 } else if (simd_has_avx) {
97 LOG2_INFO("SIMD", "AVX enabled");
98 }
99}
100
101INIT(SIMD) {
102 init_simd();
103}
104
105void sse_add_pd(double* dst, const double* a, const double* b) {
106 asm volatile("movapd (%1), %%xmm0\n" // load 2 double dari a
107 "movapd (%2), %%xmm1\n" // load 2 double dari b
108 "addpd %%xmm1, %%xmm0\n" // xmm0 = xmm0 + xmm1
109 "movapd %%xmm0, (%0)\n" // store hasil ke dst
110 :
111 : "r"(dst), "r"(a), "r"(b)
112 : "xmm0", "xmm1");
113}
114
115void simd_sub_pd(double* dst, const double* a, const double* b) {
116 asm volatile("movapd (%1), %%xmm0\n" // load 2 double dari a");
117 "movapd (%2), %%xmm1\n" // load 2 double dari b
118 "subpd %%xmm1, %%xmm0\n" // xmm0 = xmm0 - xmm1
119 "movapd %%xmm0, (%0)\n" // store hasil ke dst
120 :
121 : "r"(dst), "r"(a), "r"(b)
122 : "xmm0", "xmm1");
123}
124
125void simd_mul_pd(double* dst, const double* a, const double* b) {
126 asm volatile("movapd (%1), %%xmm0\n" // load 2 double dari a");
127 "movapd (%2), %%xmm1\n" // load 2 double dari b
128 "mulpd %%xmm1, %%xmm0\n" // xmm0 = xmm0 * xmm1
129 "movapd %%xmm0, (%0)\n" // store hasil ke dst
130 :
131 : "r"(dst), "r"(a), "r"(b)
132 : "xmm0", "xmm1");
133}
134// void simd_div(double *dst, const double *a, const double *b);
135
136void fma_mul_add_pd(double* dst, const double* a, const double* b,
137 const double* c) {
138 asm volatile("vmovapd (%1), %%ymm0\n" // load a
139 "vmovapd (%2), %%ymm1\n" // load b
140 "vmovapd (%3), %%ymm2\n" // load c
141 "vfmadd132pd %%ymm1, %%ymm2, %%ymm0\n" // ymm0 = (ymm0 *
142 // ymm1) + ymm2
143 "vmovapd %%ymm0, (%0)\n" // store result
144 :
145 : "r"(dst), "r"(a), "r"(b), "r"(c)
146 : "ymm0", "ymm1", "ymm2");
147}
148
149void fma_mul_sub_pd(double* dst, const double* a, const double* b,
150 const double* c) {
151 asm volatile("vmovapd (%1), %%ymm0\n" // load a
152 "vmovapd (%2), %%ymm1\n" // load b
153 "vmovapd (%3), %%ymm2\n" // load c
154 "vfmsub132pd %%ymm1, %%ymm2, %%ymm0\n" // ymm0 = (ymm0 *
155 // ymm1) - ymm2
156 "vmovapd %%ymm0, (%0)\n"
157 :
158 : "r"(dst), "r"(a), "r"(b), "r"(c)
159 : "ymm0", "ymm1", "ymm2");
160}
161
162// void simd_fma_
163// }
void cpuid(uint32_t leaf, uint32_t subleaf, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
Definition cpuid.c:4
a
Definition entry.h:6
static ssfn_buf_t dst
Definition graphic.c:82
#define INIT(fn)
Definition init.h:26
#define LOG2_WARN(mod, fmt,...)
Definition serial.h:40
#define LOG2_INFO(mod, fmt,...)
Definition serial.h:33
boolean_t simd_has_avx2
Definition simd.c:9
void simd_mul_pd(double *dst, const double *a, const double *b)
Definition simd.c:125
void fma_mul_add_pd(double *dst, const double *a, const double *b, const double *c)
Definition simd.c:136
void simd_sub_pd(double *dst, const double *a, const double *b)
Definition simd.c:115
boolean_t simd_has_avx
Definition simd.c:8
void fma_mul_sub_pd(double *dst, const double *a, const double *b, const double *c)
Definition simd.c:149
void init_simd()
Definition simd.c:11
void sse_add_pd(double *dst, const double *a, const double *b)
Definition simd.c:105
unsigned int uint32_t
Definition type.h:19
uint8_t boolean_t
Definition type.h:89
unsigned long uint64_t
Definition type.h:25