1use crate::{
2 ConstCoeffBitWidth, EvaluatablePolynomial, EvaluationError, Polynomial,
3 univariate::{dense::DensePolynomial, prepare_projection},
4};
5use core::mem::MaybeUninit;
6use crypto_primitives::{PrimeField, Semiring, semiring::boolean::Boolean};
7use derive_more::{AsRef, Display};
8use num_traits::{CheckedAdd, CheckedMul, CheckedSub, One, Zero};
9use rand::{distr::StandardUniform, prelude::*};
10use std::{
11 array,
12 hash::Hash,
13 iter::{Product, Sum},
14 marker::PhantomData,
15 ops::{Add, AddAssign, Mul, MulAssign, Sub, SubAssign},
16};
17use zinc_transcript::delegate_const_transcribable;
18use zinc_utils::{
19 from_ref::FromRef,
20 inner_product::{InnerProduct, InnerProductError},
21 mul_by_scalar::MulByScalar,
22 named::Named,
23 projectable_to_field::ProjectableToField,
24};
25
26#[derive(AsRef, Clone, Debug, Default, Display, Hash, PartialEq, Eq)]
27#[repr(transparent)]
28pub struct BinaryU64Poly<const DEGREE_PLUS_ONE: usize>(u64); impl<const DEGREE_PLUS_ONE: usize> BinaryU64Poly<DEGREE_PLUS_ONE> {
31 #[inline(always)]
32 pub const fn inner(&self) -> &u64 {
33 &self.0
34 }
35}
36
37impl<const DEGREE_PLUS_ONE: usize> From<BinaryU64Poly<DEGREE_PLUS_ONE>> for u64 {
38 #[inline(always)]
39 fn from(binary_poly: BinaryU64Poly<DEGREE_PLUS_ONE>) -> Self {
40 binary_poly.0
41 }
42}
43
44impl<const DEGREE_PLUS_ONE: usize> From<BinaryU64Poly<DEGREE_PLUS_ONE>>
45 for DensePolynomial<Boolean, DEGREE_PLUS_ONE>
46{
47 #[inline(always)]
48 fn from(binary_poly: BinaryU64Poly<DEGREE_PLUS_ONE>) -> Self {
49 DensePolynomial {
50 coeffs: array::from_fn(|i| Boolean::new(!(binary_poly.0 & (1 << i)).is_zero())),
51 }
52 }
53}
54
55impl From<u32> for BinaryU64Poly<32> {
56 fn from(value: u32) -> Self {
57 Self(u64::from(value)) }
59}
60
61impl From<u64> for BinaryU64Poly<64> {
62 fn from(value: u64) -> Self {
63 Self(value) }
65}
66
67impl<const DEGREE_PLUS_ONE: usize> BinaryU64Poly<DEGREE_PLUS_ONE> {
68 #[inline(always)]
73 pub fn new(coeffs: impl AsRef<[Boolean]>) -> Self {
74 let coeffs = coeffs.as_ref();
76 assert!(coeffs.len() <= DEGREE_PLUS_ONE);
77 let mut value: u64 = 0;
78 for (i, coeff) in coeffs.iter().enumerate() {
79 if coeff.inner() {
80 value |= 1 << i;
81 }
82 }
83 Self(value)
84 }
85
86 #[inline(always)]
91 pub fn new_padded(coeffs: impl AsRef<[Boolean]>) -> Self {
92 let coeffs = coeffs.as_ref();
93 assert!(coeffs.len() <= DEGREE_PLUS_ONE);
94 let mut value: u64 = 0;
95 for (i, coeff) in coeffs.iter().enumerate() {
96 if coeff.inner() {
97 value |= 1 << i;
98 }
99 }
100 Self(value)
101 }
102}
103
104impl<const DEGREE_PLUS_ONE: usize> Zero for BinaryU64Poly<DEGREE_PLUS_ONE> {
105 #[inline(always)]
106 fn zero() -> Self {
107 Self(0)
108 }
109
110 #[inline(always)]
111 fn is_zero(&self) -> bool {
112 self.0.is_zero()
113 }
114}
115
116impl<const DEGREE_PLUS_ONE: usize> One for BinaryU64Poly<DEGREE_PLUS_ONE> {
117 #[inline(always)]
118 fn one() -> Self {
119 Self(1)
120 }
121}
122
123impl<'a, const DEGREE_PLUS_ONE: usize> Add<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
124 type Output = Self;
125
126 #[allow(clippy::arithmetic_side_effects, clippy::suspicious_arithmetic_impl)]
127 #[inline(always)]
128 fn add(self, rhs: &'a Self) -> Self::Output {
129 Self(self.0 ^ rhs.0)
131 }
132}
133
134impl<const DEGREE_PLUS_ONE: usize> Add for BinaryU64Poly<DEGREE_PLUS_ONE> {
135 type Output = Self;
136
137 #[allow(clippy::arithmetic_side_effects)]
138 #[inline(always)]
139 fn add(self, rhs: Self) -> Self::Output {
140 self.add(&rhs)
141 }
142}
143
144impl<'a, const DEGREE_PLUS_ONE: usize> Sub<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
145 type Output = Self;
146
147 #[allow(clippy::arithmetic_side_effects, clippy::suspicious_arithmetic_impl)]
148 #[inline(always)]
149 fn sub(self, rhs: &'a Self) -> Self::Output {
150 Self(self.0 ^ rhs.0)
152 }
153}
154
155impl<const DEGREE_PLUS_ONE: usize> Sub for BinaryU64Poly<DEGREE_PLUS_ONE> {
156 type Output = Self;
157
158 #[allow(clippy::arithmetic_side_effects)]
159 #[inline(always)]
160 fn sub(self, rhs: Self) -> Self::Output {
161 self.sub(&rhs)
162 }
163}
164
165impl<const DEGREE_PLUS_ONE: usize> Mul for BinaryU64Poly<DEGREE_PLUS_ONE> {
166 type Output = Self;
167
168 #[allow(clippy::arithmetic_side_effects)]
169 #[inline(always)]
170 fn mul(self, rhs: Self) -> Self::Output {
171 self.mul(&rhs)
172 }
173}
174
175impl<'a, const DEGREE_PLUS_ONE: usize> Mul<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
176 type Output = Self;
177
178 #[allow(clippy::arithmetic_side_effects)]
179 #[inline(always)]
180 fn mul(self, _rhs: &'a Self) -> Self::Output {
181 unimplemented!("Multiplication for BinaryU64Poly is not implemented yet");
182 }
183}
184
185#[allow(clippy::suspicious_op_assign_impl)]
186impl<'a, const DEGREE_PLUS_ONE: usize> AddAssign<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
187 #[inline(always)]
188 fn add_assign(&mut self, rhs: &'a Self) {
189 self.0 ^= rhs.0;
191 }
192}
193
194impl<const DEGREE_PLUS_ONE: usize> AddAssign for BinaryU64Poly<DEGREE_PLUS_ONE> {
195 #[inline(always)]
196 fn add_assign(&mut self, rhs: Self) {
197 self.add_assign(&rhs);
198 }
199}
200
201#[allow(clippy::suspicious_op_assign_impl)]
202impl<'a, const DEGREE_PLUS_ONE: usize> SubAssign<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
203 #[inline(always)]
204 fn sub_assign(&mut self, rhs: &'a Self) {
205 self.0 ^= rhs.0;
207 }
208}
209
210impl<const DEGREE_PLUS_ONE: usize> SubAssign for BinaryU64Poly<DEGREE_PLUS_ONE> {
211 #[inline(always)]
212 fn sub_assign(&mut self, rhs: Self) {
213 self.sub_assign(&rhs);
214 }
215}
216
217impl<const DEGREE_PLUS_ONE: usize> MulAssign for BinaryU64Poly<DEGREE_PLUS_ONE> {
218 #[inline(always)]
219 fn mul_assign(&mut self, _rhs: Self) {
220 unimplemented!("Multiplication for BinaryU64Poly is not implemented yet");
221 }
222}
223
224impl<'a, const DEGREE_PLUS_ONE: usize> MulAssign<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
225 #[inline(always)]
226 fn mul_assign(&mut self, _rhs: &'a Self) {
227 unimplemented!("Multiplication for BinaryU64Poly is not implemented yet");
228 }
230}
231
232impl<const DEGREE_PLUS_ONE: usize> CheckedAdd for BinaryU64Poly<DEGREE_PLUS_ONE> {
233 #[inline(always)]
234 fn checked_add(&self, other: &Self) -> Option<Self> {
235 if (self.0 & other.0) != 0 {
237 None
238 } else {
239 Some(Self(self.0 ^ other.0))
241 }
242 }
243}
244
245impl<const DEGREE_PLUS_ONE: usize> CheckedSub for BinaryU64Poly<DEGREE_PLUS_ONE> {
246 #[inline(always)]
247 fn checked_sub(&self, other: &Self) -> Option<Self> {
248 if ((!self.0) & other.0) != 0 {
250 None
252 } else {
253 Some(Self(self.0 ^ other.0))
255 }
256 }
257}
258
259impl<const DEGREE_PLUS_ONE: usize> CheckedMul for BinaryU64Poly<DEGREE_PLUS_ONE> {
260 #[inline(always)]
261 fn checked_mul(&self, _other: &Self) -> Option<Self> {
262 unimplemented!("Checked multiplication for BinaryU64Poly is not implemented yet");
263 }
264}
265
266impl<'a, const DEGREE_PLUS_ONE: usize> Sum<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
267 #[inline(always)]
268 fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
269 iter.sum()
270 }
271}
272
273impl<'a, const DEGREE_PLUS_ONE: usize> Product<&'a Self> for BinaryU64Poly<DEGREE_PLUS_ONE> {
274 #[inline(always)]
275 fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
276 iter.product()
277 }
278}
279
280impl<const DEGREE_PLUS_ONE: usize> Semiring for BinaryU64Poly<DEGREE_PLUS_ONE> {}
281
282impl<const DEGREE_PLUS_ONE: usize> Distribution<BinaryU64Poly<DEGREE_PLUS_ONE>>
283 for StandardUniform
284{
285 #[inline(always)]
286 #[allow(clippy::arithmetic_side_effects)]
287 fn sample<Gen: Rng + ?Sized>(&self, rng: &mut Gen) -> BinaryU64Poly<DEGREE_PLUS_ONE> {
288 let mut value: u64 = rng.next_u64();
289 if DEGREE_PLUS_ONE < 64 {
290 value &= (1u64 << DEGREE_PLUS_ONE) - 1;
291 }
292 BinaryU64Poly::<DEGREE_PLUS_ONE>(value)
293 }
294}
295
296impl<const DEGREE_PLUS_ONE: usize> Polynomial<Boolean> for BinaryU64Poly<DEGREE_PLUS_ONE> {
300 const DEGREE_BOUND: usize = DensePolynomial::<Boolean, DEGREE_PLUS_ONE>::DEGREE_BOUND;
301}
302
303#[allow(clippy::arithmetic_side_effects)]
304impl<R: Clone + Zero + One + CheckedAdd + CheckedMul, const DEGREE_PLUS_ONE: usize>
305 EvaluatablePolynomial<Boolean, R> for BinaryU64Poly<DEGREE_PLUS_ONE>
306{
307 type EvaluationPoint = R;
308
309 fn evaluate_at_point(&self, point: &R) -> Result<R, EvaluationError> {
310 if DEGREE_PLUS_ONE.is_one() {
311 return Ok(R::zero());
312 }
313
314 let mut result = R::zero();
315 let mut pow = R::one();
316 for i in 0..DEGREE_PLUS_ONE {
317 if (self.0 & (1 << i)) != 0 {
318 result = result.checked_add(&pow).ok_or(EvaluationError::Overflow)?;
319 }
320 if i + 1 < DEGREE_PLUS_ONE {
321 pow = pow.checked_mul(point).ok_or(EvaluationError::Overflow)?;
322 }
323 }
324
325 Ok(result)
326 }
327}
328
329impl<const DEGREE_PLUS_ONE: usize> ConstCoeffBitWidth for BinaryU64Poly<DEGREE_PLUS_ONE> {
330 const COEFF_BIT_WIDTH: usize = DensePolynomial::<Boolean, DEGREE_PLUS_ONE>::COEFF_BIT_WIDTH;
331}
332
333impl<const DEGREE_PLUS_ONE: usize> Named for BinaryU64Poly<DEGREE_PLUS_ONE> {
334 fn type_name() -> String {
335 format!("BPoly<{}>", Self::DEGREE_BOUND)
336 }
337}
338
339delegate_const_transcribable!(BinaryU64Poly<const DEGREE_PLUS_ONE: usize>(u64));
340
341impl<const DEGREE_PLUS_ONE: usize> FromRef<BinaryU64Poly<DEGREE_PLUS_ONE>>
342 for BinaryU64Poly<DEGREE_PLUS_ONE>
343{
344 #[inline(always)]
345 fn from_ref(poly: &BinaryU64Poly<DEGREE_PLUS_ONE>) -> Self {
346 poly.clone()
347 }
348}
349
350impl<const DEGREE_PLUS_ONE: usize> From<&BinaryU64Poly<DEGREE_PLUS_ONE>>
351 for BinaryU64Poly<DEGREE_PLUS_ONE>
352{
353 #[inline(always)]
354 fn from(value: &BinaryU64Poly<DEGREE_PLUS_ONE>) -> Self {
355 Self::from_ref(value)
356 }
357}
358
359pub struct BinaryU64PolyIter<'a, const DEGREE_PLUS_ONE: usize> {
360 i: usize,
361 poly: &'a BinaryU64Poly<DEGREE_PLUS_ONE>,
362}
363
364impl<'a, const DEGREE_PLUS_ONE: usize> BinaryU64PolyIter<'a, DEGREE_PLUS_ONE> {
365 pub fn new(poly: &'a BinaryU64Poly<DEGREE_PLUS_ONE>) -> Self {
366 Self { i: 0, poly }
367 }
368}
369
370impl<'a, const DEGREE_PLUS_ONE: usize> Iterator for BinaryU64PolyIter<'a, DEGREE_PLUS_ONE> {
371 type Item = Boolean;
372
373 #[allow(clippy::arithmetic_side_effects)]
374 fn next(&mut self) -> Option<Self::Item> {
375 if self.i < DEGREE_PLUS_ONE {
376 let i = self.i;
377
378 self.i += 1;
379
380 Some((!(self.poly.0 & (1 << i)).is_zero()).into())
381 } else {
382 None
383 }
384 }
385}
386
387impl<const DEGREE_PLUS_ONE: usize> BinaryU64Poly<DEGREE_PLUS_ONE> {
388 pub fn iter(&self) -> BinaryU64PolyIter<'_, DEGREE_PLUS_ONE> {
389 BinaryU64PolyIter::new(self)
390 }
391}
392
393#[derive(Clone, Debug)]
394pub struct BinaryU64PolyInnerProduct<R, const DEGREE_PLUS_ONE: usize>(PhantomData<R>);
395
396impl<Rhs, Out, const DEGREE_PLUS_ONE: usize> InnerProduct<BinaryU64Poly<DEGREE_PLUS_ONE>, Rhs, Out>
397 for BinaryU64PolyInnerProduct<Rhs, DEGREE_PLUS_ONE>
398where
399 Rhs: Clone,
400 Out: FromRef<Rhs> + CheckedAdd,
401{
402 #[inline(always)]
403 #[allow(clippy::arithmetic_side_effects)] fn inner_product<const CHECK: bool>(
405 lhs: &BinaryU64Poly<DEGREE_PLUS_ONE>,
406 rhs: &[Rhs],
407 zero: Out,
408 ) -> Result<Out, InnerProductError> {
409 if rhs.len() != DEGREE_PLUS_ONE {
410 return Err(InnerProductError::LengthMismatch {
411 lhs: DEGREE_PLUS_ONE,
412 rhs: rhs.len(),
413 });
414 }
415
416 let mut acc = zero;
417 let mut bits = lhs.0;
418 while bits != 0 {
419 let i = bits.trailing_zeros() as usize;
420 let rhs = Out::from_ref(&rhs[i]);
421 if CHECK {
422 acc = acc.checked_add(&rhs).ok_or(InnerProductError::Overflow)?;
423 } else {
424 acc = acc + rhs;
425 }
426 bits &= bits - 1;
428 }
429
430 Ok(acc)
431 }
432}
433
434impl<F, const DEGREE_PLUS_ONE: usize> ProjectableToField<F> for BinaryU64Poly<DEGREE_PLUS_ONE>
435where
436 F: PrimeField + FromRef<F> + 'static,
437{
438 fn prepare_projection(sampled_value: &F) -> impl Fn(&Self) -> F + 'static {
439 prepare_projection::<F, Self, _, DEGREE_PLUS_ONE>(sampled_value, |poly, i| {
440 (poly.0 & (1 << i)) != 0
441 })
442 }
443}
444
445impl<const DEGREE_PLUS_ONE: usize> MulByScalar<&i64, DensePolynomial<i64, DEGREE_PLUS_ONE>>
446 for BinaryU64Poly<DEGREE_PLUS_ONE>
447{
448 fn mul_by_scalar<const CHECK: bool>(
449 &self,
450 rhs: &i64,
451 ) -> Option<DensePolynomial<i64, DEGREE_PLUS_ONE>> {
452 Some(widen_simd::<DEGREE_PLUS_ONE>(self, *rhs))
453 }
454}
455
456#[allow(unreachable_code, unused_variables)] #[inline(always)]
458pub fn widen_simd<const DEGREE_PLUS_ONE: usize>(
459 poly: &BinaryU64Poly<DEGREE_PLUS_ONE>,
460 scalar: i64,
461) -> DensePolynomial<i64, DEGREE_PLUS_ONE> {
462 let mut coeffs_uninit = MaybeUninit::<[i64; DEGREE_PLUS_ONE]>::uninit();
463 let out_ptr = coeffs_uninit.as_mut_ptr() as *mut i64;
464
465 #[cfg(target_arch = "aarch64")]
466 unsafe {
467 widen_fill_neon::<DEGREE_PLUS_ONE>(&poly.0, out_ptr, scalar);
468 }
469 #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
470 unsafe {
471 widen_fill_avx512::<DEGREE_PLUS_ONE>(&poly.0, out_ptr, scalar);
472 }
473 #[cfg(not(any(
474 target_arch = "aarch64",
475 all(target_arch = "x86_64", target_feature = "avx512f")
476 )))]
477 {
478 panic!("SIMD widening not supported on this architecture");
479 }
480
481 let coeffs = unsafe { coeffs_uninit.assume_init() };
482 DensePolynomial { coeffs }
483}
484
485#[allow(
486 clippy::arithmetic_side_effects,
487 clippy::cast_possible_truncation,
488 clippy::cast_lossless
489)]
490#[allow(unsafe_op_in_unsafe_fn)]
491#[cfg(target_arch = "aarch64")]
492#[inline(always)]
493unsafe fn widen_fill_neon<const N: usize>(mask_ref: &u64, out_ptr: *mut i64, scalar: i64) {
496 use core::arch::aarch64::*;
497
498 let mask64: u64 = *mask_ref;
499
500 let scalar_v: int64x2_t = vdupq_n_s64(scalar);
502
503 let mut i = 0usize;
504
505 if N == 64 {
507 #[repr(align(64))]
511 struct LookupTable([[i64; 8]; 256]);
512
513 static LUT: LookupTable = {
514 let mut table = [[0i64; 8]; 256];
515 let mut i = 0;
516 while i < 256 {
517 let mut bit = 0;
518 while bit < 8 {
519 table[i][bit] = if (i & (1 << bit)) != 0 { -1 } else { 0 };
520 bit += 1;
521 }
522 i += 1;
523 }
524 LookupTable(table)
525 };
526
527 while i + 8 <= N {
529 let shift = i as u32;
530 let byte: u8 = ((mask64 >> shift) & 0xFF) as u8;
531
532 let masks_ptr = LUT.0[byte as usize].as_ptr();
534 let m0: int64x2_t = vld1q_s64(masks_ptr);
535 let m1: int64x2_t = vld1q_s64(masks_ptr.add(2));
536 let m2: int64x2_t = vld1q_s64(masks_ptr.add(4));
537 let m3: int64x2_t = vld1q_s64(masks_ptr.add(6));
538
539 vst1q_s64(out_ptr.add(i), vandq_s64(scalar_v, m0));
541 vst1q_s64(out_ptr.add(i + 2), vandq_s64(scalar_v, m1));
542 vst1q_s64(out_ptr.add(i + 4), vandq_s64(scalar_v, m2));
543 vst1q_s64(out_ptr.add(i + 6), vandq_s64(scalar_v, m3));
544
545 i += 8;
546 }
547 } else {
548 const BIT_MASKS: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
550 let bit_masks: uint8x8_t = vld1_u8(BIT_MASKS.as_ptr());
551 let zero_u8: uint8x8_t = vdup_n_u8(0);
552
553 while i + 8 <= N {
555 let shift = i as u32;
556 let byte: u8 = ((mask64 >> shift) & 0xFF) as u8;
557
558 let vbyte: uint8x8_t = vdup_n_u8(byte);
560 let selected: uint8x8_t = vand_u8(vbyte, bit_masks);
561 let nz: uint8x8_t = vcgt_u8(selected, zero_u8); let nz_signed: int8x8_t = vreinterpret_s8_u8(nz); let s16: int16x8_t = vmovl_s8(nz_signed); let lo32: int32x4_t = vmovl_s16(vget_low_s16(s16));
567 let hi32: int32x4_t = vmovl_s16(vget_high_s16(s16));
568
569 let m0: int64x2_t = vmovl_s32(vget_low_s32(lo32)); let m1: int64x2_t = vmovl_s32(vget_high_s32(lo32));
571 let m2: int64x2_t = vmovl_s32(vget_low_s32(hi32));
572 let m3: int64x2_t = vmovl_s32(vget_high_s32(hi32));
573
574 vst1q_s64(out_ptr.add(i), vandq_s64(scalar_v, m0));
576 vst1q_s64(out_ptr.add(i + 2), vandq_s64(scalar_v, m1));
577 vst1q_s64(out_ptr.add(i + 4), vandq_s64(scalar_v, m2));
578 vst1q_s64(out_ptr.add(i + 6), vandq_s64(scalar_v, m3));
579
580 i += 8;
581 }
582 }
583
584 while i < N {
586 let bit = ((mask64 >> i) & 1) != 0;
587 let mask = -(bit as i64); *out_ptr.add(i) = scalar & mask;
589 i += 1;
590 }
591}
592
593#[allow(
594 clippy::arithmetic_side_effects,
595 clippy::cast_possible_truncation,
596 clippy::cast_lossless
597)]
598#[allow(unsafe_op_in_unsafe_fn)]
599#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
600#[inline(always)]
601unsafe fn widen_fill_avx512<const N: usize>(mask_ref: &u64, out_ptr: *mut i64, scalar: i64) {
606 #[cfg(target_arch = "x86_64")]
607 use core::arch::x86_64::*;
608
609 let mask64: u64 = *mask_ref;
610 let mut i = 0usize;
611
612 while i + 32 <= N {
615 let shift = i as u32;
616
617 let byte0: u8 = ((mask64 >> shift) & 0xFF) as u8;
619 let byte1: u8 = ((mask64 >> (shift + 8)) & 0xFF) as u8;
620 let byte2: u8 = ((mask64 >> (shift + 16)) & 0xFF) as u8;
621 let byte3: u8 = ((mask64 >> (shift + 24)) & 0xFF) as u8;
622
623 let kmask0: __mmask8 = byte0;
625 let kmask1: __mmask8 = byte1;
626 let kmask2: __mmask8 = byte2;
627 let kmask3: __mmask8 = byte3;
628
629 let result0: __m512i = _mm512_maskz_set1_epi64(kmask0, scalar);
631 let result1: __m512i = _mm512_maskz_set1_epi64(kmask1, scalar);
632 let result2: __m512i = _mm512_maskz_set1_epi64(kmask2, scalar);
633 let result3: __m512i = _mm512_maskz_set1_epi64(kmask3, scalar);
634
635 _mm512_storeu_si512(out_ptr.add(i) as *mut __m512i, result0);
637 _mm512_storeu_si512(out_ptr.add(i + 8) as *mut __m512i, result1);
638 _mm512_storeu_si512(out_ptr.add(i + 16) as *mut __m512i, result2);
639 _mm512_storeu_si512(out_ptr.add(i + 24) as *mut __m512i, result3);
640
641 i += 32;
642 }
643
644 while i + 8 <= N {
646 let shift = i as u32;
647 let byte: u8 = ((mask64 >> shift) & 0xFF) as u8;
648 let kmask: __mmask8 = byte;
649 let result: __m512i = _mm512_maskz_set1_epi64(kmask, scalar);
650 _mm512_storeu_si512(out_ptr.add(i) as *mut __m512i, result);
651 i += 8;
652 }
653
654 while i < N {
656 let bit = ((mask64 >> i) & 1) != 0;
657 let mask = -(bit as i64); *out_ptr.add(i) = scalar & mask;
659 i += 1;
660 }
661}
662
663#[cfg(test)]
664mod tests {
665 use crate::univariate::binary_ref::BinaryRefPoly;
666 use zinc_transcript::traits::{ConstTranscribable, GenTranscribable};
667 use zinc_utils::CHECKED;
668
669 use super::*;
670
671 #[test]
672 fn evaluate_is_correct() {
673 for i in 0..16 {
674 let poly = BinaryU64Poly::<4>::new([
675 (i & 0b0001 != 0).into(),
676 (i & 0b0010 != 0).into(),
677 (i & 0b0100 != 0).into(),
678 (i & 0b1000 != 0).into(),
679 ]);
680
681 let result = poly.evaluate_at_point(&2).unwrap();
682
683 assert_eq!(result, i);
684 }
685 }
686
687 #[test]
688 fn transcribable() {
689 let original =
690 BinaryU64Poly::<4>::new([true.into(), false.into(), true.into(), false.into()]);
691 let mut bytes = [0u8; BinaryU64Poly::<4>::NUM_BYTES];
692 assert_eq!(bytes.len(), u64::NUM_BYTES);
693
694 original.write_transcription_bytes_exact(&mut bytes);
695 let deserialized = BinaryU64Poly::<4>::read_transcription_bytes_exact(&bytes);
696 assert_eq!(original, deserialized);
697 }
698
699 fn widen_ref<const DEGREE_PLUS_ONE: usize>(
700 poly: &BinaryU64Poly<DEGREE_PLUS_ONE>,
701 scalar: i64,
702 ) -> DensePolynomial<i64, DEGREE_PLUS_ONE> {
703 let mut coeffs: [i64; DEGREE_PLUS_ONE] = [0; DEGREE_PLUS_ONE];
704 for (i, coeff) in coeffs.iter_mut().enumerate().take(DEGREE_PLUS_ONE) {
705 if (poly.0 & (1 << i)) != 0 {
706 *coeff = scalar;
707 }
708 }
709 DensePolynomial { coeffs }
710 }
711
712 #[ignore = "CI system does not support SIMD features"]
713 #[test]
714 fn widen_ref_and_widen_ref_simd_match() {
715 for i in 0..16 {
717 let poly = BinaryU64Poly::<4>::new([
718 (i & 0b0001 != 0).into(),
719 (i & 0b0010 != 0).into(),
720 (i & 0b0100 != 0).into(),
721 (i & 0b1000 != 0).into(),
722 ]);
723
724 let poly_ref = BinaryRefPoly::<4>::new([
725 (i & 0b0001 != 0).into(),
726 (i & 0b0010 != 0).into(),
727 (i & 0b0100 != 0).into(),
728 (i & 0b1000 != 0).into(),
729 ]);
730
731 for scalar in [1, 42, -7, 100, -100, i64::MAX, i64::MIN] {
732 let result_simd_ref = widen_ref(&poly, scalar);
733 let result_simd = widen_simd(&poly, scalar);
734 let result_ref = poly_ref.mul_by_scalar::<CHECKED>(&scalar).unwrap();
735
736 assert_eq!(
737 result_simd_ref.coeffs, result_simd.coeffs,
738 "Mismatch for pattern {} with scalar {}",
739 i, scalar
740 );
741 assert_eq!(
742 result_simd_ref.coeffs, result_ref.coeffs,
743 "Mismatch for pattern {} with scalar {} between ref and MulByScalar",
744 i, scalar
745 );
746 }
747 }
748
749 let coeffs: Vec<_> = [
750 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1,
751 0, 1, 0,
752 ]
753 .into_iter()
754 .map(|x| (x != 0).into())
755 .collect();
756
757 let poly32 = BinaryU64Poly::<32>::new(coeffs.clone());
758 let poly32_ref = BinaryRefPoly::<32>::new(coeffs);
759
760 for scalar in [1, 42, -7, 100, -100] {
761 let result_simd_ref = widen_ref(&poly32, scalar);
762 let result_simd = widen_simd(&poly32, scalar);
763 let result_ref = poly32_ref.mul_by_scalar::<CHECKED>(&scalar).unwrap();
764 assert_eq!(
765 result_simd_ref.coeffs, result_simd.coeffs,
766 "Mismatch for degree 32 with scalar {}",
767 scalar
768 );
769 assert_eq!(
770 result_simd_ref.coeffs, result_ref.coeffs,
771 "Mismatch for degree 32 with scalar {} between ref and MulByScalar",
772 scalar
773 );
774 }
775
776 let poly_zeros = BinaryU64Poly::<16>::new([false.into(); 16]);
778 let result_ref_zeros = widen_ref(&poly_zeros, 42);
779 let result_simd_zeros = widen_simd(&poly_zeros, 42);
780 assert_eq!(result_ref_zeros.coeffs, result_simd_zeros.coeffs);
781
782 let poly_ones = BinaryU64Poly::<16>::new([true.into(); 16]);
784 let result_ref_ones = widen_ref(&poly_ones, 42);
785 let result_simd_ones = widen_simd(&poly_ones, 42);
786 assert_eq!(result_ref_ones.coeffs, result_simd_ones.coeffs);
787 }
788}