顯示包含「code snip」標籤的文章。顯示所有文章
顯示包含「code snip」標籤的文章。顯示所有文章

2009年8月5日星期三

Restrict 指針

一般時候我們的編譯器都不知道兩個指針所指的位置是否相同或者有所重疊,大大降低了它的優化效果。就拿 Matrix3 * Vector3 為例:

void Matrix::MulVector(const Vec3& v, Vec3& result) {
result.x = m00 * v.x + m01 * v.y + m02 * v.z;
result.y = m10 * v.x + m11 * v.y + m12 * v.z;
result.z = m20 * v.x + m21 * v.y + m22 * v.z;
}

很簡單的三行代碼,然而它隱藏了一個效能上的問題。堂 result 被賦予了新的值後,編譯器認為 v 的值也可能被更改了 (v 和 result 也是 reference,pointer 的一類)。因此原本可以留在 register 裡的 v.x, v.y 和 v.z 被迫從新由記憶體裡閱讀回來。看看VC2008 編譯成的機器碼吧:

result.x = m00 * v.x + m01 * v.y + m02 * v.z;
mov eax,dword ptr [esp+4]
movss xmm0,dword ptr [ecx+8]
mulss xmm0,dword ptr [eax+8]
movss xmm1,dword ptr [ecx+4]
mulss xmm1,dword ptr [eax+4]
mov edx,dword ptr [esp+8]
addss xmm0,xmm1
movss xmm1,dword ptr [eax]
mulss xmm1,dword ptr [ecx]
addss xmm0,xmm1
movss dword ptr [edx],xmm0

result.y = m10 * v.x + m11 * v.y + m12 * v.z;
movss xmm0,dword ptr [ecx+0Ch]
mulss xmm0,dword ptr [eax]
movss xmm1,dword ptr [ecx+14h]
mulss xmm1,dword ptr [eax+8]
addss xmm0,xmm1
movss xmm1,dword ptr [ecx+10h]
mulss xmm1,dword ptr [eax+4]
addss xmm0,xmm1
movss dword ptr [edx+4],xmm0

result.z = m20 * v.x + m21 * v.y + m22 * v.z;
movss xmm0,dword ptr [ecx+18h]
mulss xmm0,dword ptr [eax]
movss xmm1,dword ptr [ecx+20h]
mulss xmm1,dword ptr [eax+8]
addss xmm0,xmm1
movss xmm1,dword ptr [ecx+1Ch]
mulss xmm1,dword ptr [eax+4]
addss xmm0,xmm1
movss dword ptr [edx+8],xmm0

這時候使用 restrict 就可幫上編譯器把。請注意,VC2008 的 __restrict 只對指針生效:

void Matrix::MulVector(const Vec3& v_, Vec3& result_) {
const Vec3* __restrict v = &v_;
Vec3* __restrict ret = &result_;
result->x = m00 * v->x + m01 * v->y + m02 * v->z;
result->y = m10 * v->x + m11 * v->y + m12 * v->z;
result->z = m20 * v->x + m21 * v->y + m22 * v->z;
}

從新編譯後的機器碼:

result->x = m00 * v->x + m01 * v->y + m02 * v->z;
mov eax,dword ptr [esp+4]
movss xmm1,dword ptr [eax+4]
movss xmm0,dword ptr [eax+8]
movss xmm2,dword ptr [eax]
movss xmm3,dword ptr [ecx+4]
movss xmm4,dword ptr [ecx+8]
mov eax,dword ptr [esp+8]
mulss xmm3,xmm1
mulss xmm4,xmm0
addss xmm3,xmm4
movaps xmm4,xmm2
mulss xmm4,dword ptr [ecx]
addss xmm3,xmm4

result->y = m10 * v->x + m11 * v->y + m12 * v->z;
movss xmm4,dword ptr [ecx+10h]
movss dword ptr [eax],xmm3
movss xmm3,dword ptr [ecx+0Ch]
mulss xmm3,xmm2
mulss xmm4,xmm1
addss xmm3,xmm4
movss xmm4,dword ptr [ecx+14h]
mulss xmm4,xmm0
addss xmm3,xmm4
movss dword ptr [eax+4],xmm3

result->z = m20 * v->x + m21 * v->y + m22 * v->z;
movss xmm3,dword ptr [ecx+18h]
mulss xmm3,xmm2
movss xmm2,dword ptr [ecx+1Ch]
mulss xmm2,xmm1
movss xmm1,dword ptr [ecx+20h]
addss xmm3,xmm2
mulss xmm1,xmm0
addss xmm3,xmm1
movss dword ptr [eax+8],xmm3

可以看到記憶體閱讀操作減少了。

到最後,其實使用局部變量也可達到類似效果:

void Matrix::MulVector(const Vec3& v, Vec3& result) {
const float x = v.x;
const float y = v.y;
const float z = v.z;

result.x = m00 * x + m01 * y + m02 * z;
result.y = m10 * x + m11 * y + m12 * z;
result.z = m20 * x + m21 * y + m22 * z;
}

2009年1月13日星期二

檔案監視器

看過了猴子靈藥的 "Database Hot Loader" 後,心動之下又想做些類似的東西,哪就是一個用來監視檔案系統的小工具。有了它,遊戲裡的任何素材(美術素材,以及音效、字型、腳本程序等等)檔案一經修改就會立即在遊戲裡反映出來,因而省去重新啟動遊戲程式的煩厭。當然背後還需健全的資源系統才能成事。

在視窗環境中,標準的方法是調用 FindFirstChangeNotificationReadDirectoryChangesW;前者告訴你某個資料夾有否被更改,後者還會告訴你甚麼檔案/資料夾曾被更改。怎知 ReadDirectoryChangesW 的調用殊不簡單,MSDN 又沒有範例,上 CodeProject 碰碰運氣得來的是一個 3000 多行代碼的類別,Google 一翻還是找不到想要的。

經過一翻努力和嘗試(我相信 MSDN 是有錯漏的),得知 GetOverlappedResultReadDirectoryChangesW 的配合是最簡單的;無須和任何多緒有關的東西打交道。我的 FileMonitor 就只有 constructor 和 getChangedFile 這兩個函數。

FileMonitor.h

#ifndef __FILEMONITOR__
#define __FILEMONITOR__

#include <string>

/*! To monitor file changes under a particular folder.
The implementation use the win32 ReadDirectoryChangesW() function
with GetOverlappedResult() to perform the monitoring, therefore
no thread is created and so making the interface very simple.

With the limitation of ReadDirectoryChangesW() is using a fixed buffer
to hold the information between calls of getChangedFile(), FileMonitor
may fail to detect file changes between calls of getChangedFile() if
the file names are too large to fit into the buffer. To overcome the
issue, you need to call getChangedFile() frequently.

\sa http://mtlung.blogspot.com/2009/01/blog-post.html

Example:
\code
FileMonitor monitor(L"pathToMonitor", true);
// In your main loop:
while(true) {
std::wstring path;
while(!(path = monitor.getChangedFile()).empty()) {
std::wcout << path << std::endl;
}
}
\endcode
*/
class FileMonitor
{
// FileMonitor is non-copyable
FileMonitor(const FileMonitor&);
FileMonitor& operator=(const FileMonitor&);

public:
/*! Constructor
\param path The path to monitor
\param recursive Watch the path recursively
\param operationTowatch Which file operation to monitor with,
having the same meaning as the dwNotifyFilter in ReadDirectoryChangesW()
function, see http://msdn.microsoft.com/en-us/library/aa365465(VS.85).aspx
for more details. By default, it's value is -1 and have the same meaning
as FILE_NOTIFY_CHANGE_LAST_WRITE
*/
FileMonitor(const wchar_t* path, bool recursive, int operationTowatch = -1);

~FileMonitor();

/*! Get which file under the watching directory is changed.
This function is non-blocking and if there is no changes in the
file system, it will simple return an empty string.

\note
The current implementation use a fixed buffer to capture all the
file changes between calls of getChangedFile(). If there are too
much changes or the file names get too long, the buffer overflow
and that file change notification will lost.
See more on the documentation of ReadDirectoryChangesW() in MSDN.
*/
std::wstring getChangedFile() const;

private:
class Impl; //!< Private implementation class
Impl* mImpl;
}; // FileMonitor

#endif // __FILEMONITOR__

FileMonitor.cpp

#include "FileMonitor.h"
#include <assert.h>
#include <iostream>
#include <list>

// Exclude rarely-used stuff from Windows headers
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#ifndef VC_EXTRALEAN
# define VC_EXTRALEAN
#endif
#include <windows.h>

#ifdef _MSC_VER // Currently only windows is supported

class FileMonitor::Impl
{
public:
Impl(const wchar_t* path, bool recursive, int operationTowatch)
: mRecursive(recursive), mOperationTowatch(operationTowatch)
{
assert(int(mBuffer) % 4 == 0 && "Address of mBuffer must be 4-byte aligned");

// Adjust the defalt value for mOperationTowatch
if(mOperationTowatch == -1)
mOperationTowatch = FILE_NOTIFY_CHANGE_LAST_WRITE;

mDirectory = ::CreateFileW(
path,
FILE_LIST_DIRECTORY,
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
0,
OPEN_EXISTING,
// ReadDirectoryChangesW() needs FILE_FLAG_BACKUP_SEMANTICS
FILE_FLAG_OVERLAPPED | FILE_FLAG_BACKUP_SEMANTICS,
0
);

memset(&mOverlapped, 0, sizeof(mOverlapped));
if(!mDirectory || !readChange()) {
::CloseHandle(mDirectory);
mDirectory = NULL;
std::wcerr << L"Fail to watch directory: " << path << std::endl;
}
}

~Impl()
{
::CloseHandle(mDirectory);
}

bool readChange() const
{
return ::ReadDirectoryChangesW(
mDirectory,
mBuffer, sizeof(mBuffer),
mRecursive,
mOperationTowatch,
NULL, // bytesRetured
&mOverlapped,
0 // callBack
) != 0;
}

std::wstring getChangedFile() const
{
// We will try to call GetOverlappedResult() even there are entries inside
// mFiles, so that it's less possible for the mBuffer to be overflowed.

// For some unknown reason(s) ReadDirectoryChangesW() will report the file twice,
// therefore we add a loop to filter out those duplicated entries.
for(size_t i=2; i--;)
{
DWORD bytesRetured = 0;
if(0 == ::GetOverlappedResult(mDirectory, &mOverlapped, &bytesRetured, false))
goto CACHED; // The use of goto here makes the code clean.

if(bytesRetured == 0) {
// TODO: To reduce the chance of insufficient buffer,
// we can move the code to another thread.
std::wcerr << L"Error returned by ReadDirectoryChangesW(), "
L"most likely the internal buffer is too small" << std::endl;
readChange();
goto CACHED;
}

FILE_NOTIFY_INFORMATION* p = reinterpret_cast<FILE_NOTIFY_INFORMATION*>(mBuffer);
while(true)
{
std::wstring fileName(p->FileName, p->FileNameLength / sizeof(wchar_t));

// Skip duplicated entry
if(mFiles.empty() || fileName != mFiles.back())
mFiles.push_back(fileName);

if(p->NextEntryOffset == 0)
break;

p = reinterpret_cast<FILE_NOTIFY_INFORMATION*>((char*)p + p->NextEntryOffset);

// Do some extra buffer overflow check.
if((char*)p - (char*)mBuffer > sizeof(mBuffer))
break;
}

if(!readChange())
return L"";
}

CACHED:
if(!mFiles.empty()) {
std::wstring ret = mFiles.front();
mFiles.pop_front();
return ret;
}

return L"";
}

HANDLE mDirectory;
bool mRecursive;
int mOperationTowatch;
/*! This buffer must be 4-byte aligned, therefore we use int as the type.
You may change the buffer size to fit your needs.
*/
mutable int mBuffer[2048];
mutable OVERLAPPED mOverlapped;
//! A list of wstring acting as a circular buffer.
mutable std::list<std::wstring> mFiles;
}; // Impl

FileMonitor::FileMonitor(const wchar_t* path, bool recursive, int operationTowatch)
{
mImpl = new Impl(path, recursive, operationTowatch);
}

FileMonitor::~FileMonitor()
{
delete mImpl;
}

std::wstring FileMonitor::getChangedFile() const
{
__assume(mImpl); // We know mImpl is always not null, shut off the C++ analysis warning
return mImpl->getChangedFile();
}

#endif // _MSC_VER

Main.cpp

#include "FileMonitor.h"
#include <iostream>
#include <conio.h> // For _kbhit()

int main()
{
FileMonitor monitor(L"./", true);

std::wcout << L"Create and modify the files in the current directory, "
L"and the FileMonitor will tell you the name of those files.";
std::wcout << L" Press any key to quit the program" << std::endl;

while(!_kbhit())
{
std::wstring path;
// Keep polling the monitor, But a real application should
// only poll the monitor once a while.
while(!(path = monitor.getChangedFile()).empty())
{
std::wcout << path << std::endl;
}
}

return 0;
}