|
发表于 2019-6-18 00:13:29
|
显示全部楼层
回帖奖励 +100 鱼币
- #include <fstream>
- #include <iostream>
- #include <string>
- #include <algorithm>
- #include <cstdlib>
- #include <boost/ptr_container/ptr_vector.hpp>
- #include <boost/shared_ptr.hpp>
- #include "utf8.h"
- #define WIN32_LEAN_AND_MEAN
- #define WIN32_EXTRA_LEAN
- #define NOMINMAX
- #include <Windows.h>
- typedef boost::shared_ptr<std::wstring> strPtr;
- bool parseTo(const wchar_t *&p, wchar_t delim, const wchar_t * const end)
- {
- while (*p != delim && p != end)
- ++p;
- if (p == end)
- return false;
- return true;
- }
- bool parseToAndSkip(const wchar_t *&p, wchar_t delim, const wchar_t * const end)
- {
- if (!parseTo(p, delim, end))
- return false;
- if (++p == end)
- return false;
- return true;
- }
- wchar_t *copyOf(const wchar_t * const begin, const wchar_t * const end)
- {
- const size_t SIZE = end - begin;
- wchar_t *ret = new wchar_t[SIZE + 1];
- std::memcpy(ret, begin, SIZE * sizeof(wchar_t));
- //std::copy(begin, end, ret);
- ret[SIZE] = '\0';
- return ret;
- }
- class DictionaryEntry
- {
- private:
- DictionaryEntry(const DictionaryEntry&);
- DictionaryEntry &operator=(const DictionaryEntry&);
- public:
- wchar_t *trad, *pinyin, *english;
- DictionaryEntry(wchar_t *trad, wchar_t *pinyin, wchar_t *english):
- trad(trad), pinyin(pinyin), english(english)
- {
- }
- ~DictionaryEntry()
- {
- delete[] trad;
- delete[] pinyin;
- delete[] english;
- }
- static DictionaryEntry *parse(const wchar_t *line, size_t len)
- {
- const wchar_t * const end = line + len;
- const wchar_t *parseEnd = line;
- if (!parseTo(parseEnd, L' ', end))
- return NULL;
- //strPtr trad(new std::wstring(line, parseEnd));
- wchar_t *trad = copyOf(line, parseEnd);
- line = parseEnd;
- if (!parseToAndSkip(line, L'[', end))
- return NULL;
- parseEnd = line;
- if (!parseTo(parseEnd, L']', end))
- return NULL;
- //strPtr pinyin(new std::wstring(line, parseEnd));
- wchar_t *pinyin = copyOf(line, parseEnd);
- line = parseEnd;
- if (!parseToAndSkip(line, L'/', end))
- return NULL;
- parseEnd = end;
- --parseEnd;
- while (*parseEnd != '/' && parseEnd != line)
- --parseEnd;
- if (parseEnd == line)
- return NULL;
- //strPtr english(new std::wstring(line, parseEnd));
- wchar_t *english = copyOf(line, parseEnd);
- //return new DictionaryEntry(trad, pinyin, english);
- return new DictionaryEntry(trad, pinyin, english);
- }
- };
- class Dictionary
- {
- private:
- boost::ptr_vector<DictionaryEntry> dict;
- public:
- Dictionary()
- {
- std::ifstream in;
- in.open("cedict_ts.u8", std::ios::binary);
- assert(in.good());
- in.seekg(0, std::ios::end);
- const size_t FILE_SIZE = in.tellg();
- in.seekg(0, std::ios::beg);
- char *buf = new char[FILE_SIZE];
- char *bufEnd = buf + FILE_SIZE;
- in.read(buf, FILE_SIZE);
- wchar_t line[4096];
- size_t lineLen = 0;
- char *lineCur = buf;
- while (lineCur < bufEnd)
- {
- wchar_t c = utf8::next(lineCur, bufEnd);
- if (c == '\n' || lineCur == bufEnd)
- {
- // process line
- if (lineLen > 0)
- {
- if (line[0] != '#') // comment
- {
- DictionaryEntry *de = DictionaryEntry::parse(line, lineLen);
- if (de)
- dict.push_back(de);
- }
- lineLen = 0;
- }
- }
- else if (c != 13) // carriage return on windows
- {
- line[lineLen++] = c;
- }
- }
- delete buf;
- }
- size_t length() const
- {
- return dict.size();
- }
- };
- int main(int argc, char *argv[])
- {
- LARGE_INTEGER startTime, endTime, freq;
- QueryPerformanceFrequency(&freq);
- QueryPerformanceCounter(&startTime);
- Dictionary dict;
- QueryPerformanceCounter(&endTime);
- std::cout << "length: " << dict.length() << "\n";
- std::cout << "frequency: " << freq.QuadPart << "\n";
- std::cout << "time: " << (endTime.QuadPart - startTime.QuadPart) / (double)freq.QuadPart << "s\n";
- }
复制代码 |
|