|
Shinya Kitaoka |
810553 |
#pragma once
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#ifndef OPTIMIZE_FOR_LP64_INCLUDED
|
|
Toshihiro Shimizu |
890ddd |
#define OPTIMIZE_FOR_LP64_INCLUDED
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/*
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
*****************************************************************************
|
|
Toshihiro Shimizu |
890ddd |
* OSSERVAZIONI *
|
|
Toshihiro Shimizu |
890ddd |
*****************************************************************************
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
____________OSS 1:___________________________________________________________
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
se devo fare DUE MOLTIPLICAZIONI 13 bit * 8 bit posso farle in un
|
|
Toshihiro Shimizu |
890ddd |
colpo solo, ad esempio:
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
siano X = xxxxxxxxxxxxx
|
|
Toshihiro Shimizu |
890ddd |
S = ssssssss
|
|
Toshihiro Shimizu |
890ddd |
Y = yyyyyyyyyyyyy
|
|
Toshihiro Shimizu |
890ddd |
T = tttttttt
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
e devo calcolare
|
|
Toshihiro Shimizu |
890ddd |
U = X * S
|
|
Toshihiro Shimizu |
890ddd |
V = Y * T
|
|
Toshihiro Shimizu |
890ddd |
posso farlo in un colpo solo impacchettando i bit cosi':
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
A = X 0 00000000 Y = xxxxxxxxxxxxx 0 00000000 yyyyyyyyyyyyy
|
|
Toshihiro Shimizu |
890ddd |
B = 00000 S 0 00000000 00000 T = 00000ssssssss 0 00000000 00000tttttttt
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
ora se faccio C = A * B si ha
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
C = U ?????????????????????? V =
|
|
Toshihiro Shimizu |
890ddd |
= uuuuuuuuuuuuuuuuuuuuu ?????????????????????? vvvvvvvvvvvvvvvvvvvvv
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
dove C e' di 64 bit; cioe' i primi 21 bit sono X * S = U
|
|
Toshihiro Shimizu |
890ddd |
e gli ultimi 21 sono Y * T = V
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
____________OSS 2:___________________________________________________________
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
se devo fare DUE MOLTIPLICAZIONI 16 bit * 16 bit del tipo
|
|
Toshihiro Shimizu |
890ddd |
X * S = U
|
|
Toshihiro Shimizu |
890ddd |
Y * S = V
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
con
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#X = 16,
|
|
Toshihiro Shimizu |
890ddd |
#Y = 16,
|
|
Toshihiro Shimizu |
890ddd |
#S = 16
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
(dove l'operatore '#' da' come risultato il numero di bit di cui e' composto
|
|
Toshihiro Shimizu |
890ddd |
un numero intero)
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
posso farle tutte e due in un solo colpo impacchettando i bit cosi':
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
O = 0000000000000000, #O = 16
|
|
Toshihiro Shimizu |
890ddd |
A = X O Y , #A = 48
|
|
Toshihiro Shimizu |
890ddd |
B = S , #B = 16
|
|
Toshihiro Shimizu |
890ddd |
C = A * B , #C = 64
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
dove i primi 32 bit sono X * S e i secondi 32 bit sono Y * S
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
____________OSS 3:___________________________________________________________
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
se devo fare QUATTRO MOLTIPLICAZIONI 8 bit * 8 bit del tipo
|
|
Toshihiro Shimizu |
890ddd |
X * S = I #X = 8, #S = 8, #I = 16
|
|
Toshihiro Shimizu |
890ddd |
Y * S = J #Y = 8, #S = 8, #J = 16
|
|
Toshihiro Shimizu |
890ddd |
Z * S = K #Z = 8, #S = 8, #K = 16
|
|
Toshihiro Shimizu |
890ddd |
W * S = L #W = 8, #S = 8, #L = 16
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
posso farle tutte e due in un solo colpo impacchettando i bit cosi':
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
O = 00000000 #O = 8
|
|
Toshihiro Shimizu |
890ddd |
C = XOYOZOW * OOOOOOS #C = 64
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
dove
|
|
Toshihiro Shimizu |
890ddd |
I sono i primi 16 bit,
|
|
Toshihiro Shimizu |
890ddd |
J sono i secondi 16 bit,
|
|
Toshihiro Shimizu |
890ddd |
K sono i terzi 16 bit,
|
|
Toshihiro Shimizu |
890ddd |
L i quarti 16 bit
|
|
Toshihiro Shimizu |
890ddd |
_____________________________________________________________________________
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
*****************************************************************************
|
|
Toshihiro Shimizu |
890ddd |
*/
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define OPTIMIZE_FOR_LP64
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define MASK_FIRST_OF_3_X_16BIT 0x7FFFC00000000
|
|
Toshihiro Shimizu |
890ddd |
#define MASK_SECOND_OF_3_X_16BIT 0x3FFFE0000
|
|
Toshihiro Shimizu |
890ddd |
#define MASK_THIRD_OF_3_X_16BIT 0x1FFFF
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define FIRST_OF_3_X_16BIT(x) (x) >> 34
|
|
Toshihiro Shimizu |
890ddd |
#define SECOND_OF_3_X_16BIT(x) ((x)&MASK_SECOND_OF_3_X_16BIT) >> 17;
|
|
Toshihiro Shimizu |
890ddd |
#define THIRD_OF_3_X_16BIT(x) (x) & MASK_THIRD_OF_3_X_16BIT;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define MASK_FIRST_OF_2_X_24BIT 0x3FFFFFE000000
|
|
Toshihiro Shimizu |
890ddd |
#define MASK_SECOND_OF_2_X_24BIT 0x1FFFFFF
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define FIRST_OF_2_X_24BIT(x) (x) >> 25
|
|
Toshihiro Shimizu |
890ddd |
#define SECOND_OF_2_X_24BIT(x) (x) & MASK_SECOND_OF_2_X_24BIT
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define MASK_FIRST_OF_2_X_32BIT 0xFFFFFFFF00000000
|
|
Toshihiro Shimizu |
890ddd |
#define MASK_SECOND_OF_2_X_32BIT 0xFFFFFFFF
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define FIRST_OF_2_X_32BIT(x) (x) >> 32
|
|
Toshihiro Shimizu |
890ddd |
#define SECOND_OF_2_X_32BIT(x) (x) & MASK_SECOND_OF_2_X_32BIT
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
typedef unsigned char UINT8;
|
|
Toshihiro Shimizu |
890ddd |
typedef unsigned short UINT16;
|
|
Toshihiro Shimizu |
890ddd |
typedef unsigned int UINT24;
|
|
Toshihiro Shimizu |
890ddd |
typedef unsigned int UINT32;
|
|
Toshihiro Shimizu |
890ddd |
typedef unsigned long UINT50;
|
|
Toshihiro Shimizu |
890ddd |
typedef unsigned long UINT51;
|
|
Toshihiro Shimizu |
890ddd |
typedef unsigned long UINT64;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#if 0
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* esegue a1+b1, a2+c2, a3+c3 in un'unica operazione */
|
|
Toshihiro Shimizu |
890ddd |
UINT64 add_3_x_16bit ( UINT16 a1, UINT16 a2, UINT16 a3,
|
|
Toshihiro Shimizu |
890ddd |
UINT16 b1, UINT16 b2, UINT16 b3 );
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* esegue a1+b1, a2+b2 in un'unica operazione */
|
|
Toshihiro Shimizu |
890ddd |
UINT50 add_2_x_24bit ( UINT24 a1, UINT24 a2,
|
|
Toshihiro Shimizu |
890ddd |
UINT24 b1, UINT24 b2 );
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* esegue a1*b, a2*b in un'unica operazione */
|
|
Toshihiro Shimizu |
890ddd |
UINT64 mult_2_x_16bit ( UINT16 a1, UINT16 a2,
|
|
Toshihiro Shimizu |
890ddd |
UINT16 b );
|
|
Toshihiro Shimizu |
890ddd |
#endif
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ========================================================================= */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ------------------------------------------------------------------------- */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define ADD_3_X_16BIT(a1, a2, a3, b1, b2, b3) \
|
|
Toshihiro Shimizu |
890ddd |
(0L | (UINT64)(a1) << 34 | (UINT64)(a2) << 17 | (a3)) + \
|
|
Toshihiro Shimizu |
890ddd |
(0L | (UINT64)(b1) << 34 | (UINT64)(b2) << 17 | (b3))
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
inline UINT64 add_3_x_16bit(UINT16 a1, UINT16 a2, UINT16 a3,
|
|
Toshihiro Shimizu |
890ddd |
UINT16 b1, UINT16 b2, UINT16 b3)
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return (0L | (UINT64)a1 << 34 | (UINT64)a2 << 17 | a3) +
|
|
Toshihiro Shimizu |
890ddd |
(0L | (UINT64)b1 << 34 | (UINT64)b2 << 17 | b3);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ------------------------------------------------------------------------- */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define ADD_2_X_24BIT(a1, a2, b1, b2) \
|
|
Toshihiro Shimizu |
890ddd |
(0L | (UINT64)(a1) << 25 | (a2)) + (0L | (UINT64)(b1) << 25 | (b2))
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
inline UINT50 add_2_x_24bit(UINT24 a1, UINT24 a2,
|
|
Toshihiro Shimizu |
890ddd |
UINT24 b1, UINT24 b2)
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return (0L | (UINT64)a1 << 25 | a2) +
|
|
Toshihiro Shimizu |
890ddd |
(0L | (UINT64)b1 << 25 | b2);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
/* ------------------------------------------------------------------------- */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#define MULT_2_X_16BIT(a1, a2, b) ((UINT64)b) * (((UINT64)(a1) << 32) | (UINT64)a2)
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
inline UINT64 mult_2_x_16bit(UINT16 a1, UINT16 a2,
|
|
Toshihiro Shimizu |
890ddd |
UINT16 b)
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return (0L | (UINT64)a1 << 32 | a2) * b;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#endif
|