Shinya Kitaoka 810553
#pragma once
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#ifndef OPTIMIZE_FOR_LP64_INCLUDED
Toshihiro Shimizu 890ddd
#define OPTIMIZE_FOR_LP64_INCLUDED
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/*
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  *****************************************************************************
Toshihiro Shimizu 890ddd
  *  OSSERVAZIONI                                                             *
Toshihiro Shimizu 890ddd
  *****************************************************************************
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  ____________OSS 1:___________________________________________________________
Shinya Kitaoka 120a6e
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  se devo fare DUE MOLTIPLICAZIONI 13 bit * 8 bit posso farle in un
Toshihiro Shimizu 890ddd
  colpo solo, ad esempio:
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  siano X = xxxxxxxxxxxxx
Shinya Kitaoka 120a6e
  S = ssssssss
Shinya Kitaoka 120a6e
  Y = yyyyyyyyyyyyy
Toshihiro Shimizu 890ddd
  T = tttttttt
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
  e devo calcolare
Toshihiro Shimizu 890ddd
  U = X * S
Toshihiro Shimizu 890ddd
  V = Y * T
Toshihiro Shimizu 890ddd
  posso farlo in un colpo solo impacchettando i bit cosi':
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  A = X       0 00000000 Y       = xxxxxxxxxxxxx 0 00000000 yyyyyyyyyyyyy
Toshihiro Shimizu 890ddd
  B = 00000 S 0 00000000 00000 T = 00000ssssssss 0 00000000 00000tttttttt
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
  ora se faccio C = A * B si ha
Shinya Kitaoka 120a6e
Shinya Kitaoka 120a6e
  C = U ?????????????????????? V =
Toshihiro Shimizu 890ddd
  = uuuuuuuuuuuuuuuuuuuuu ?????????????????????? vvvvvvvvvvvvvvvvvvvvv
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  dove C e' di 64 bit; cioe' i primi 21 bit sono X * S = U
Toshihiro Shimizu 890ddd
  e gli ultimi 21 sono Y * T = V
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  ____________OSS 2:___________________________________________________________
Shinya Kitaoka 120a6e
Shinya Kitaoka 120a6e
Shinya Kitaoka 120a6e
  se devo fare DUE MOLTIPLICAZIONI 16 bit * 16 bit del tipo
Toshihiro Shimizu 890ddd
  X * S = U
Toshihiro Shimizu 890ddd
  Y * S = V
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  con
Shinya Kitaoka 120a6e
Toshihiro Shimizu 890ddd
  #X = 16,
Toshihiro Shimizu 890ddd
  #Y = 16,
Toshihiro Shimizu 890ddd
  #S = 16
Shinya Kitaoka 120a6e
Shinya Kitaoka 120a6e
  (dove l'operatore '#' da' come risultato il numero di bit di cui e' composto
Toshihiro Shimizu 890ddd
  un numero intero)
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  posso farle tutte e due in un solo colpo impacchettando i bit cosi':
Shinya Kitaoka 120a6e
Toshihiro Shimizu 890ddd
  O = 0000000000000000,   #O = 16
Toshihiro Shimizu 890ddd
  A = X O Y           ,   #A = 48
Toshihiro Shimizu 890ddd
  B = S               ,   #B = 16
Toshihiro Shimizu 890ddd
  C = A * B           ,   #C = 64
Shinya Kitaoka 120a6e
Toshihiro Shimizu 890ddd
  dove i primi 32 bit sono X * S e i secondi 32 bit sono Y * S
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  ____________OSS 3:___________________________________________________________
Shinya Kitaoka 120a6e
Shinya Kitaoka 120a6e
Shinya Kitaoka 120a6e
  se devo fare QUATTRO MOLTIPLICAZIONI 8 bit * 8 bit del tipo
Toshihiro Shimizu 890ddd
  X * S = I           #X = 8, #S = 8, #I = 16
Toshihiro Shimizu 890ddd
  Y * S = J           #Y = 8, #S = 8, #J = 16
Toshihiro Shimizu 890ddd
  Z * S = K           #Z = 8, #S = 8, #K = 16
Toshihiro Shimizu 890ddd
  W * S = L           #W = 8, #S = 8, #L = 16
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  posso farle tutte e due in un solo colpo impacchettando i bit cosi':
Shinya Kitaoka 120a6e
Toshihiro Shimizu 890ddd
  O = 00000000             #O = 8
Toshihiro Shimizu 890ddd
  C = XOYOZOW * OOOOOOS    #C = 64
Shinya Kitaoka 120a6e
Shinya Kitaoka 120a6e
  dove
Shinya Kitaoka 120a6e
  I sono i primi 16 bit,
Shinya Kitaoka 120a6e
  J sono i secondi 16 bit,
Toshihiro Shimizu 890ddd
  K sono i terzi 16 bit,
Toshihiro Shimizu 890ddd
  L i quarti 16 bit
Toshihiro Shimizu 890ddd
  _____________________________________________________________________________
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
  *****************************************************************************
Toshihiro Shimizu 890ddd
  */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#define OPTIMIZE_FOR_LP64
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#define MASK_FIRST_OF_3_X_16BIT 0x7FFFC00000000
Toshihiro Shimizu 890ddd
#define MASK_SECOND_OF_3_X_16BIT 0x3FFFE0000
Toshihiro Shimizu 890ddd
#define MASK_THIRD_OF_3_X_16BIT 0x1FFFF
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#define FIRST_OF_3_X_16BIT(x) (x) >> 34
Toshihiro Shimizu 890ddd
#define SECOND_OF_3_X_16BIT(x) ((x)&MASK_SECOND_OF_3_X_16BIT) >> 17;
Toshihiro Shimizu 890ddd
#define THIRD_OF_3_X_16BIT(x) (x) & MASK_THIRD_OF_3_X_16BIT;
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#define MASK_FIRST_OF_2_X_24BIT 0x3FFFFFE000000
Toshihiro Shimizu 890ddd
#define MASK_SECOND_OF_2_X_24BIT 0x1FFFFFF
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#define FIRST_OF_2_X_24BIT(x) (x) >> 25
Toshihiro Shimizu 890ddd
#define SECOND_OF_2_X_24BIT(x) (x) & MASK_SECOND_OF_2_X_24BIT
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#define MASK_FIRST_OF_2_X_32BIT 0xFFFFFFFF00000000
Toshihiro Shimizu 890ddd
#define MASK_SECOND_OF_2_X_32BIT 0xFFFFFFFF
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#define FIRST_OF_2_X_32BIT(x) (x) >> 32
Toshihiro Shimizu 890ddd
#define SECOND_OF_2_X_32BIT(x) (x) & MASK_SECOND_OF_2_X_32BIT
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
typedef unsigned char UINT8;
Toshihiro Shimizu 890ddd
typedef unsigned short UINT16;
Toshihiro Shimizu 890ddd
typedef unsigned int UINT24;
Toshihiro Shimizu 890ddd
typedef unsigned int UINT32;
Toshihiro Shimizu 890ddd
typedef unsigned long UINT50;
Toshihiro Shimizu 890ddd
typedef unsigned long UINT51;
Toshihiro Shimizu 890ddd
typedef unsigned long UINT64;
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#if 0
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* esegue a1+b1, a2+c2, a3+c3 in un'unica operazione */
Toshihiro Shimizu 890ddd
UINT64  add_3_x_16bit ( UINT16 a1, UINT16 a2, UINT16 a3,
Toshihiro Shimizu 890ddd
			UINT16 b1, UINT16 b2, UINT16 b3 );
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* esegue a1+b1, a2+b2 in un'unica operazione */
Toshihiro Shimizu 890ddd
UINT50 add_2_x_24bit ( UINT24 a1, UINT24 a2,
Toshihiro Shimizu 890ddd
		       UINT24 b1, UINT24 b2 );
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* esegue a1*b, a2*b in un'unica operazione */
Toshihiro Shimizu 890ddd
UINT64 mult_2_x_16bit ( UINT16 a1, UINT16 a2,
Toshihiro Shimizu 890ddd
			UINT16 b );
Toshihiro Shimizu 890ddd
#endif
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ========================================================================= */
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ------------------------------------------------------------------------- */
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
#define ADD_3_X_16BIT(a1, a2, a3, b1, b2, b3)                                  \
Shinya Kitaoka 120a6e
  (0L | (UINT64)(a1) << 34 | (UINT64)(a2) << 17 | (a3)) +                      \
Shinya Kitaoka 120a6e
      (0L | (UINT64)(b1) << 34 | (UINT64)(b2) << 17 | (b3))
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
inline UINT64 add_3_x_16bit(UINT16 a1, UINT16 a2, UINT16 a3, UINT16 b1,
Shinya Kitaoka 120a6e
                            UINT16 b2, UINT16 b3) {
Shinya Kitaoka 120a6e
  return (0L | (UINT64)a1 << 34 | (UINT64)a2 << 17 | a3) +
Shinya Kitaoka 120a6e
         (0L | (UINT64)b1 << 34 | (UINT64)b2 << 17 | b3);
Toshihiro Shimizu 890ddd
}
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ------------------------------------------------------------------------- */
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
#define ADD_2_X_24BIT(a1, a2, b1, b2)                                          \
Shinya Kitaoka 120a6e
  (0L | (UINT64)(a1) << 25 | (a2)) + (0L | (UINT64)(b1) << 25 | (b2))
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
inline UINT50 add_2_x_24bit(UINT24 a1, UINT24 a2, UINT24 b1, UINT24 b2) {
Shinya Kitaoka 120a6e
  return (0L | (UINT64)a1 << 25 | a2) + (0L | (UINT64)b1 << 25 | b2);
Toshihiro Shimizu 890ddd
}
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
/* ------------------------------------------------------------------------- */
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
#define MULT_2_X_16BIT(a1, a2, b)                                              \
Shinya Kitaoka 120a6e
  ((UINT64)b) * (((UINT64)(a1) << 32) | (UINT64)a2)
Toshihiro Shimizu 890ddd
Shinya Kitaoka 120a6e
inline UINT64 mult_2_x_16bit(UINT16 a1, UINT16 a2, UINT16 b) {
Shinya Kitaoka 120a6e
  return (0L | (UINT64)a1 << 32 | a2) * b;
Toshihiro Shimizu 890ddd
}
Toshihiro Shimizu 890ddd
Toshihiro Shimizu 890ddd
#endif