diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..99bba72 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.3.0) +project (linecount) + +add_library (linecount SHARED linecount.cpp linecount.h) +add_library (linecount_static STATIC linecount.cpp linecount.h) +add_executable (lc main.cpp) +target_link_libraries (lc linecount_static) diff --git a/create_testfiles.sh b/create_testfiles.sh new file mode 100755 index 0000000..6b31d9f --- /dev/null +++ b/create_testfiles.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +dd if=/dev/urandom of=test1.txt bs=1000000 count=10 +dd if=/dev/urandom of=test2.txt bs=1000000 count=100 +dd if=/dev/urandom of=test3.txt bs=1000000 count=1000 diff --git a/linecount.cpp b/linecount.cpp index dff6dd9..d2de1c7 100644 --- a/linecount.cpp +++ b/linecount.cpp @@ -8,7 +8,9 @@ #define LCCLOSEFILE(handle) CloseHandle(handle) #define LCINVALIDHANDLE INVALID_HANDLE_VALUE #define LCSETREALLASTERROR(err, errstr) { setLastError((err), (errstr)); } +#define MAP_FAILED NULL; typedef long long LCFILEOFFSET; + #elif defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) // POSIX @@ -16,23 +18,28 @@ typedef long long LCFILEOFFSET; #include #include #include -#include -#if (defined (__APPLE__) && defined (__MACH__) +#if (defined (__APPLE__) && defined (__MACH__)) #include typedef off_t LCFILEOFFSET; -#define MMAP mmap +#define MMAP ::mmap +#define FSTAT ::fstat +#define STAT ::stat #elif defined(__linux__) typedef off64_t LCFILEOFFSET; -#define MMAP mmap64 +#define MMAP ::mmap64 +#define FSTAT ::fstat64 +#define STAT ::stat64 #else typedef off_t LCFILEOFFSET; -#define MMAP mmap +#define MMAP ::mmap +#define FSTAT ::fstat +#define STAT ::stat #endif -#define LCOPENFILE(name) open(name, O_RDONLY) -#define LCCLOSEFILE(handle) (close(handle) != -1) +#define LCOPENFILE(name) ::open(name, O_RDONLY) +#define LCCLOSEFILE(handle) (::close(handle) != -1) #define LCINVALIDHANDLE -1 -#define LCSETREALLASTERROR(err, errstr) { int __err = errno; setLastError(__err, strerror(__err)); } +#define LCSETREALLASTERROR(err, errstr) { int __err = errno; setLastError(__err, ::strerror(__err)); } #endif @@ -51,37 +58,36 @@ CLineCount::CLineCount(PARAMETERS *parameters) // Set line count parameter defaults int cpucount; + int allocationgranularity; #ifdef _WIN32 SYSTEM_INFO sysinfo; GetSystemInfo(&sysinfo); cpucount = sysinfo.dwNumberOfProcessors; -#elif defined(__linux__) - cpucount = sysconf(_SC_NPROCESSORS_ONLN); -#elif (defined (__APPLE__) && defined (__MACH__)) - intsize_t count_len = sizeof(cpucount); - sysctlbyname("hw.logicalcpu", &cpucount, &count_len, NULL, 0); + allocationgranularity = sysinfo.dwAllocationGranularity; +//#elif defined(__linux__) #else - cpucount = 1; + cpucount = sysconf(_SC_NPROCESSORS_ONLN); + allocationgranularity = sysconf(_SC_PAGESIZE); +//#elif (defined (__APPLE__) && defined (__MACH__)) +// mmsize_t count_len = sizeof(cpucount); +// sysctlbyname("hw.logicalcpu", &cpucount, &count_len, NULL, 0); +//#else +// cpucount = 1; #endif - m_parameters.workercount = cpucount; - m_parameters.windowsize = (1024 * 1024 * 8) / cpucount; + m_parameters.threadcount = cpucount; + m_parameters.buffersize = (1024 * 1024); // Override defaults if specified if (parameters) { - if (parameters->windowsize != -1) + if (parameters->buffersize != -1) { - m_parameters.windowsize = parameters->windowsize; -#ifdef _WIN32 - // Window size must be multiple of allocation granularity. Round up. - SYSTEM_INFO si; - GetSystemInfo(&si); - m_parameters.windowsize += (si.dwAllocationGranularity - (m_parameters.windowsize % si.dwAllocationGranularity)) % si.dwAllocationGranularity; -#endif + m_parameters.buffersize = parameters->buffersize; + m_parameters.buffersize += (allocationgranularity - (m_parameters.buffersize % allocationgranularity)) % allocationgranularity; } - if (parameters->workercount != -1) + if (parameters->threadcount != -1) { - m_parameters.workercount = parameters->workercount; + m_parameters.threadcount = parameters->threadcount; } } @@ -197,14 +203,15 @@ void *threadProc(void *ctx) #endif { LCTHREADCONTEXT *lctctx = (LCTHREADCONTEXT*)ctx; - return lctctx->m_this->countThread(lctctx->thread_number); + lctctx->m_this->countThread(lctctx->thread_number); + return NULL; } unsigned int CLineCount::countThread(int thread_number) { - LCFILEOFFSET windowsize = (LCFILEOFFSET)m_parameters.windowsize; - LCFILEOFFSET startoffset = windowsize * (LCFILEOFFSET)thread_number; - LCFILEOFFSET stride = windowsize * m_actual_thread_count; + LCFILEOFFSET buffersize = (LCFILEOFFSET)m_parameters.buffersize; + LCFILEOFFSET startoffset = buffersize * (LCFILEOFFSET)thread_number; + LCFILEOFFSET stride = buffersize * m_actual_thread_count; LCFILEOFFSET curoffset = startoffset; LCFILEOFFSET lastmapsize = 0; LCLINECOUNT count = 0; @@ -214,11 +221,12 @@ unsigned int CLineCount::countThread(int thread_number) { if (m_thread_fail) { + return -1; } // Get best file mapping window size - size_t mapsize = (size_t)min((m_filesize - curoffset), windowsize); + size_t mapsize = (size_t)std::min((m_filesize - curoffset), buffersize); // Map view of file #ifdef _WIN32 @@ -235,16 +243,17 @@ unsigned int CLineCount::countThread(int thread_number) #else if (mem) { - if(munmap(mem)!=0) + if(munmap(mem, lastmapsize) !=0) { LCSETREALLASTERROR(EINVAL, _T("memory unmap failed")); m_thread_fail = true; return -1; } } - mem = MMAP(NULL, mapsize, PROT_READ, MAP_FILE | MAP_SHARED, m_fd, curoffset); + mem = MMAP(NULL, mapsize, PROT_READ, MAP_FILE | MAP_SHARED, m_fh, curoffset); +// printf("%p %lld %lld\n",mem, mapsize, curoffset); #endif - if (mem == NULL) + if (mem == MAP_FAILED) { LCSETREALLASTERROR(EINVAL, _T("memory map failed")); m_thread_fail = true; @@ -299,7 +308,7 @@ unsigned int CLineCount::countThread(int thread_number) #else if (mem) { - if (munmap(mem) != 0) + if (munmap(mem, lastmapsize) != 0) { LCSETREALLASTERROR(EINVAL, _T("memory unmap failed")); m_thread_fail = true; @@ -349,8 +358,8 @@ bool CLineCount::countLines(LCLINECOUNT & linecount) } m_filesize = li.QuadPart; #else - struct stat64 statbuf; - if(fstat64(m_fh,&statbuf)!=0) + struct STAT statbuf; + if(FSTAT(m_fh,&statbuf)!=0) { LCSETREALLASTERROR(EBADF, _T("unable to get file size")); return false; @@ -366,16 +375,18 @@ bool CLineCount::countLines(LCLINECOUNT & linecount) } // Figure out actual thread count - LCFILEOFFSET windowcount = (m_filesize + (m_parameters.windowsize - 1)) / m_parameters.windowsize; - if (windowcount < (LCFILEOFFSET) m_parameters.workercount) + LCFILEOFFSET windowcount = (m_filesize + (m_parameters.buffersize - 1)) / m_parameters.buffersize; + if (windowcount < (LCFILEOFFSET) m_parameters.threadcount) { m_actual_thread_count = (int)windowcount; } else { - m_actual_thread_count = m_parameters.workercount; + m_actual_thread_count = m_parameters.threadcount; } +// printf("act: %d\n",m_actual_thread_count); + #ifdef _WIN32 // Prepare file mapping m_filemapping = CreateFileMapping(m_fh, NULL, PAGE_READONLY, 0, 0, NULL); @@ -405,7 +416,7 @@ bool CLineCount::countLines(LCLINECOUNT & linecount) #ifdef _WIN32 success = (WaitForSingleObject(m_threads[i], INFINITE) == WAIT_OBJECT_0); #else - success = pthread_join(m_threads[i]) == 0; + success = pthread_join(m_threads[i], NULL) == 0; #endif } @@ -429,7 +440,7 @@ bool CLineCount::countLines(LCLINECOUNT & linecount) return false; } - if (complete != m_parameters.workercount) + if (complete != m_actual_thread_count) { setLastError(ECHILD, _T("thread join failed")); return false; diff --git a/linecount.h b/linecount.h index 4da2bb0..fbe1701 100644 --- a/linecount.h +++ b/linecount.h @@ -1,6 +1,9 @@ #ifndef __INC_LINECOUNT_H #define __INC_LINECOUNT_H +#define LINECOUNT_VERSION_MAJOR 1 +#define LINECOUNT_VERSION_MINOR 0 + ///////////////////////////////////////////// Headers ////////////// Platform independent @@ -18,6 +21,7 @@ #include #elif defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) // POSIX #include +#include #define _T(x) x #define TCHAR char #endif @@ -45,7 +49,7 @@ BEGIN_LINECOUNT_NAMESPACE; typedef std::string LCSTRING; typedef int LCFILEHANDLE; typedef errno_t LCERROR; - #if (defined (__APPLE__) && defined (__MACH__) + #if (defined (__APPLE__) && defined (__MACH__)) typedef off_t LCFILEOFFSET; #define LCLINECOUNTFMT "%lld" #elif defined(__linux__) @@ -66,8 +70,8 @@ public: struct PARAMETERS { - size_t windowsize; - int workercount; + size_t buffersize; + int threadcount; }; private: diff --git a/main.cpp b/main.cpp index 097ccf0..7fa6f38 100644 --- a/main.cpp +++ b/main.cpp @@ -1,17 +1,46 @@ #include"linecount.h" + #ifdef _WIN32 + #include + #else + +#include #define _tprintf printf #define _ftprintf fprintf #define _tcscmp strcmp +#define _tcslen strlen #define _ttoi atoi #define _tcstoui64 strtoull #define _T(x) x +#define TCHAR char + #endif using namespace LineCount; +////////////////////////////////////////////////////// + + +void help(const TCHAR *argv0) +{ + _ftprintf(stderr, _T("usage: %s [options] \n"), argv0); + _ftprintf(stderr, _T(" -h --help print this usage and exit\n")); + _ftprintf(stderr, _T(" -b --buffersize size of buffer per-thread to use when reading (default is 1MB)\n")); + _ftprintf(stderr, _T(" -t --threadcount number of threads to use (defaults to number of cpu cores)\n")); + _ftprintf(stderr, _T(" -v --version print version information and exit\n")); +} + + +void version(void) +{ + _tprintf(_T("lc (linecount) %d.%2.2d\nCopyright (c) 2015 Christien Rioux\n"), LINECOUNT_VERSION_MAJOR, LINECOUNT_VERSION_MINOR); +} + +////////////////////////////////////////////////////// + + #if defined(WIN32) && defined(_UNICODE) int wmain(int argc, TCHAR **argv) #else @@ -23,27 +52,43 @@ int main(int argc, char **argv) int posparam = 0; CLineCount::PARAMETERS params; - params.windowsize = -1; - params.workercount = -1; + params.buffersize = -1; + params.threadcount = -1; TCHAR *filename = NULL; + if(argc==1) + { + help(argv[0]); + exit(0); + } + while (arg < argc) { - if (_tcscmp(argv[arg], _T("--windowsize")) == 0) + if (_tcscmp(argv[arg], _T("-h")) == 0 || _tcscmp(argv[arg], _T("--help")) == 0) + { + help(argv[0]); + exit(0); + } + else if (_tcscmp(argv[arg], _T("-v")) == 0 || _tcscmp(argv[arg], _T("--version")) == 0) + { + version(); + exit(0); + } + else if (_tcscmp(argv[arg], _T("-b")) == 0 || _tcscmp(argv[arg], _T("--buffersize")) == 0) { arg++; if (arg == argc) { - _ftprintf(stderr, _T("missing argument to --windowsize")); + _ftprintf(stderr, _T("%s: missing argument to %s\n"), argv[0], argv[arg-1]); return 1; } - _TCHAR *wsstr = argv[arg]; + TCHAR *wsstr = argv[arg]; // Check for size multipliers size_t multiplier = 1; - _TCHAR *lastchar = wsstr + (_tcslen(wsstr) - 1); + TCHAR *lastchar = wsstr + (_tcslen(wsstr) - 1); if (*lastchar == _T('k') || *lastchar == _T('K')) { multiplier = 1024; @@ -60,20 +105,25 @@ int main(int argc, char **argv) lastchar = 0; } - _TCHAR *endptr; - params.windowsize = ((size_t)_tcstoui64(argv[arg], &endptr, 10)) * multiplier; + TCHAR *endptr; + params.buffersize = ((size_t)_tcstoui64(argv[arg], &endptr, 10)) * multiplier; } - else if (_tcscmp(argv[arg], _T("--workercount")) == 0) + else if (_tcscmp(argv[arg], _T("-t")) == 0 || _tcscmp(argv[arg], _T("--threadcount")) == 0) { arg++; if (arg == argc) { - _ftprintf(stderr, _T("missing argument to --workercount")); + _ftprintf(stderr, _T("%s: Missing argument to %s\n"), argv[0], argv[arg-1]); return 1; } - params.workercount = _ttoi(argv[arg]); + params.threadcount = _ttoi(argv[arg]); + if(params.threadcount<=0) + { + _ftprintf(stderr, _T("%s: Invalid thread count\n"), argv[0]); + return 1; + } } else { @@ -83,7 +133,7 @@ int main(int argc, char **argv) } else { - _ftprintf(stderr, _T("too many arguments")); + _ftprintf(stderr, _T("%s: Too many arguments\n"), argv[0]); return 1; } posparam++; @@ -94,7 +144,7 @@ int main(int argc, char **argv) if (posparam != 1) { - _ftprintf(stderr, _T("missing required argument")); + _ftprintf(stderr, _T("%s: Missing required argument\n"), argv[0]); return 1; } @@ -106,7 +156,7 @@ int main(int argc, char **argv) LCERROR err = lc.lastError(); LCSTRING errstr = lc.lastErrorString(); - _ftprintf(stderr, _T("error (%d): %s\n"), err, errstr.c_str()); + _ftprintf(stderr, _T("%s: Error %d (%s)\n"), argv[0], err, errstr.c_str()); return err; } @@ -117,7 +167,7 @@ int main(int argc, char **argv) LCERROR err = lc.lastError(); LCSTRING errstr = lc.lastErrorString(); - _ftprintf(stderr, _T("error (%d): %s\n"), err, errstr.c_str()); + _ftprintf(stderr, _T("%s: Error %d: (%s)\n"), argv[0], err, errstr.c_str()); return err; }