boinc/client/win/opt_x86.h

/***************
 *
 *	opt_x86.h:	x86 CPU definitions, inline functions and macros
 *	Originator:		Ben Herndon
 *
 *   to be used with seti @ home client code
 *
 */

#ifndef OPT_X86_H
#define OPT_X86_H


//---------- Constants ---------//

#if defined(__GNUC__)
typedef unsigned long long int _uint64;
typedef unsigned int	_uint32;
typedef unsigned short	_uint16;
typedef unsigned char 	_uint8;

typedef long long int	_int64;
typedef int				_int32;
typedef short 			_int16;
typedef char  			_int8;

typedef unsigned long long int _uint64;
typedef unsigned int	_uint32;
typedef unsigned short	UINT16;
typedef unsigned char 	UINT8;

typedef long long int	INT64;
typedef int				INT32;
typedef short 			INT16;
typedef char  			INT8;
#endif


#if defined(_MSC_VER)
typedef unsigned long long int _uint64;
typedef unsigned int	_uint32;
typedef unsigned short	_uint16;
typedef unsigned char 	_uint8;

#endif

/*	__SSE__
	UNPCKHPS	(CAca) = (AaBb, CcDd)
	UNPCKLPS	(Dbdb) = (AaBb, CcDd)

	MOVHPS		(MmBb) = (AaBb, Mm)
	MOVLPS		(AaMm) = (AaBb, Mm)
	MOVHLPS		(AaDd) = (AaBb, CcDd)
	MOVLHPS		(CcBb) = (AaBb, CcDd)

	__3DNow__
	+PFNACC		([b-B][a-A])	= (Aa, Bb)
	+PFPNACC 	([b+B][a-A])	= (Aa, Bb)
	+PSWAPD	 	(bB)			= (Aa, Bb)

	PFACC 	 	([A+a][B+b])	= (Aa, Bb)
	PFADD 	 	([A+B][a+b])	= (Aa, Bb)
	PFMUL	 	([A*B][a*b])	= (Aa, Bb)
	PFSUB	 	([A-B][a-b])	= (Aa, Bb)
	PFSUBR	 	([B-A][B-A])	= (Aa, Bb)
	PFMAX	 	([max(A,B)][max(a,b)])	= (Aa, Bb)
	PFSUB	 	([A-B][a-b])	= (Aa, Bb)
	OPCODE	 	(xx)	= (Aa, Bb)
*/

//---------- SSE macros ---------//

//  s_get(p)  - GET 4 floats from unaligned address
//  s_put(p)	- PUT 4 floats at unaligned address
//  FILLsim(a,b,c,d) - GET 4 floats directly into register
//  s_add(a,b)	- Add 4 floats to another 4 floats, return result - Alignment or registers required
//  s_div(a,b)	- Divide 4 floats in a by those in b
//  s_max(a,b)	- Return the 4 highest floats between a and b

// && (defined(__i386__) || defined(__x86_64__))


#if defined(__3DNOW__) && defined(__GNUC__)	// GCC's versions of sse

#endif //defined(__3DNOW__)

//---------- Prototypes ---------//

//*****
//
// cycleCount - Machine specific function to get cycle counter
//
_uint64 cycleCount( void);

//
// x86_fsincos - Calculate sin & cos at same time
//
void x86_fsincos (double angle, double &sinVal, double &cosVal);

#if defined(__GNUC__)
__inline _uint64 cycleCount( void) {
	_uint64 cycles;
	_uint32	dummy;
		// Use CPUID to stop the instruction pipeline and get starting cycle
	asm volatile("cpuid" : "=A" (cycles), "=b" (dummy), "=c" (dummy));
	asm volatile("rdtsc" : "=A" (cycles) );
	return (cycles);
}

// Only available on i387 FPU and above
#if (defined(__i386__) || defined(__x86_64__))
__inline void x86_fsincos (double angle, double *sinVal, double *cosVal) {
	asm ("fsincos" : "=t" (cosVal), "=u" (sinVal) : "0" (angle));
}
__inline void x86_fsincos (double angle, float *sinVal, float *cosVal) {
	asm ("fsincos" : "=t" (cosVal), "=u" (sinVal) : "0" (angle));
}
#endif
#endif	// __GNUC


#if (defined(_MSC_VER) && defined(_M_IX86))
__inline _uint64 cycleCount( void) {
	_uint64 cycles;
		// Use CPUID to stop the instruction pipeline and get starting cycle
	__asm {
		push ebx
		push ecx
		xor	eax,eax
		cpuid
		rdtsc
		mov	DWORD PTR [cycles+0],eax
		mov	DWORD PTR [cycles+4],edx
		pop	ecx
		pop	ebx
	}
	return (cycles);
}

#define HAVE_SINCOS 1
__inline void x86_fsincos (double angle, double *sinVal, double *cosVal) {
	__asm	{
		fld	[angle]
		fsincos
		mov	eax,cosVal
		fstp	qword ptr [eax]
		mov	eax,sinVal
		fstp	qword ptr [eax]
	}
}
__inline void x86_fsincos (double angle, float *sinVal, float *cosVal) {
	__asm	{
		fld	[angle]
		fsincos
		mov	eax,cosVal
		fstp	dword ptr [eax]
		mov	eax,sinVal
		fstp	dword ptr [eax]
	}
}
#endif

//---------- Classes ---------//


//
// CLASS
//

typedef struct {
	int		L1, L2, L3;
	int		code, data;
	int		line_size;
	} CACHE_INFO;

class CPU_INFO_t {
	public:
		CPU_INFO_t();
		~CPU_INFO_t() {};

		int		cacheL1() { return cache.L1 * 1024; };
		int		cacheL2() { return cache.L2 * 1024; };
		int		cacheLine() { return cache.line_size; };
		CACHE_INFO & getCache(void) { return cache; };

		bool	has_cpuid_opcode;
		bool	has_mmx;
		bool	has_mmxPlus;
		bool	has_3Dnow;
		bool	has_3DnowPlus;
		bool	has_sse;
		bool	has_sseOS;
		bool	has_sse2;

		struct	{
			int		vendor;
			_uint32	max_stdFunc;
			_uint32	max_extFunc;
			int		family;	//	3 - 386 family, 4 - i486 family, 5 - Pentium family, 6 - Pentium Pro family
			int		model;	//
			int		rev;
			int		type;
			int		extFamily;
			int		extModel;
			int		brandID;
			} cpu;
		CACHE_INFO	cache;
		_uint32	cpuMhz;
		_uint32	busMhz;

		struct	{
			union	{
				_uint32	longs[3];
				char	name[16];
				} id;
				char	*abrvName;
				char	*company;
			} vendor;

		struct	{
			char  fromID[64];
			char *normal;
			char *abrv;
			char *nick;
			} name;

	private:
		void	checkCpuidOpcode(void);	// Use generic code to detect w/o causing bad opcode
		void	getCacheSizes(void);
		bool	sseOSsupport(void);
		void	cpuidLookup(void);
		void	cpuidName(void);
	};


#endif  // #ifndef OPT_X86_H