Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Copyright 2015 Florian Philipp
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include <wchar.h>
- /* using wcsrtombs, wcrtomb, wcstrlen */
- #include <stdlib.h>
- /* using malloc, realloc, free */
- #include <string.h>
- /* using memset, memcpy */
- #include <stdio.h>
- /* using printf, perror */
- #include <locale.h>
- /* using setlocale */
- #include <errno.h>
- /* using errno */
- #include <time.h>
- /* using clock_gettime, difftime */
- /**
- * Utility to preserve errno when calling free
- */
- static void _free_keep_errno(void* buf)
- {
- int tmp_errno;
- tmp_errno = errno;
- free(buf);
- errno = tmp_errno;
- }
- /**
- * Converts a wide character string to a multi-byte character string
- *
- * Operates by measuring the size and allocating the resulting string once
- *
- * \param ws a null-terminated wide character string
- * \return the corresponding multi-byte character string or NULL on error
- */
- static char* s_wstring_to_string_meas(const wchar_t* ws)
- {
- const wchar_t* count_ws;
- mbstate_t mbstate;
- size_t len;
- char* buf;
- count_ws = ws;
- memset(&mbstate, 0, sizeof(mbstate));
- if((len = wcsrtombs(NULL, &count_ws, 0, &mbstate)) == (size_t) -1)
- goto err_rtrn;
- len += 1;
- if(! (buf = malloc(len)))
- goto err_rtrn;
- if(wcsrtombs(buf, &ws, len, &mbstate) == (size_t) -1)
- goto err_free;
- return buf;
- err_free:
- _free_keep_errno(buf);
- err_rtrn:
- return NULL;
- }
- /**
- * Converts a wide character string to a multi-byte character string
- *
- * Operates by allocating enough memory if each wchar_t translates into
- * MB_CUR_MAX bytes, then shrinking the string in the end
- *
- * \param ws a null-terminated wide character string
- * \return the corresponding multi-byte character string or NULL on error
- */
- static char* s_wstring_to_string_max(const wchar_t* ws)
- {
- size_t len;
- char* buf, *shrunk;
- mbstate_t mbstate;
- /* make a worst case estimate. Note that MB_CUR_MAX is a runtime variable */
- len = MB_CUR_MAX * wcslen(ws) + 1;
- if(! (buf = malloc(len)))
- goto err_rtrn;
- memset(&mbstate, 0, sizeof(mbstate));
- if((len = wcsrtombs(buf, &ws, len, &mbstate)) == (size_t) -1)
- goto err_free;
- if(! (shrunk = realloc(buf, len + 1)))
- goto err_free;
- return shrunk;
- err_free:
- _free_keep_errno(buf);
- err_rtrn:
- return NULL;
- }
- /**
- * Converts a wide character string to a multi-byte character string
- *
- * Helper function for different high-level implementations.
- * Uses an initial guess for the size.
- * If the guessed size is insufficient, larger memory is allocated
- * exponentially.
- *
- * \param ws a null-terminated wide character string
- * \param buflen initial size estimate
- * \return the corresponding multi-byte character string or NULL on error
- */
- static char* _s_wstring_to_string_realloc(const wchar_t* ws, size_t buflen)
- {
- char* buffer;
- char* resized;
- size_t bufpos ;
- mbstate_t mbstate;
- memset(&mbstate, 0, sizeof(mbstate));
- /* to avoid parsing the string twice, we allocate memory speculatively
- * with exponential growth. Then we shrink it at the end
- */
- for(buffer = NULL, bufpos = 0; ws; buflen *= 2) {
- size_t converted;
- if(! (resized = realloc(buffer, buflen)))
- goto err;
- buffer = resized;
- if((converted = wcsrtombs(buffer + bufpos, &ws, buflen - bufpos, &mbstate))
- == (size_t) -1)
- goto err;
- bufpos += converted;
- }
- /* shrink buffer to actually required size */
- if(! (resized = realloc(buffer, bufpos + 1)))
- goto err;
- return resized;
- err:
- _free_keep_errno(buffer); /* buffer may be NULL */
- return NULL;
- }
- /**
- * Converts a wide character string to a multi-byte character string
- *
- * Operates by allocating the one byte per wide character and then extending it
- * exponentially
- *
- * \param ws a null-terminated wide character string
- * \return the corresponding multi-byte character string or NULL on error
- */
- static char* s_wstring_to_string_min(const wchar_t* ws)
- {
- return _s_wstring_to_string_realloc(ws, wcslen(ws) + 1);
- }
- /**
- * Converts a wide character string to a multi-byte character string
- *
- * Operates by making an uneducated guess on the memory size and then extending
- * it exponentially.
- * Currently, the initial guess is 64, i.e. one cache line
- *
- * \param ws a null-terminated wide character string
- * \return the corresponding multi-byte character string or NULL on error
- */
- static char* s_wstring_to_string_guess(const wchar_t* ws)
- {
- return _s_wstring_to_string_realloc(ws, 64);
- }
- /**
- * Converts a wide character string to a multi-byte character string
- *
- * Operates by allocating exponentially larger strings. Then concatenates them
- * in the end
- *
- * \param ws a null-terminated wide character string
- * \return the corresponding multi-byte character string or NULL on error
- */
- static char* s_wstring_to_string_rope(const wchar_t* ws)
- {
- struct _String
- {
- size_t len;
- char* str;
- };
- /* Array of exponentially growing strings */
- struct _String* rope;
- size_t ropelen = 0, ropecap = 64 / sizeof(struct _String);
- /* allocation size. Initial size is a cache line */
- size_t lastreserved = 64;
- size_t i;
- size_t outlen = 1;
- char* outbuf;
- char* outpos;
- int errno_cpy;
- mbstate_t mbstate;
- memset(&mbstate, 0, sizeof(mbstate));
- if(! (rope = malloc(ropecap * sizeof(struct _String))))
- goto err_rtrn;
- /* Convert the string into increasingly large substrings */
- for(i = 0; ws; ++i, lastreserved *= 2) {
- if(i == ropecap) {
- struct _String* newrope;
- ropecap *= 2;
- if(! (newrope = realloc(rope, ropecap * sizeof(struct _String))))
- goto err_free;
- rope = newrope;
- }
- if(! (rope[i].str = malloc(lastreserved)))
- goto err_free;
- ropelen = i + 1;
- if((rope[i].len = wcsrtombs(rope[i].str, &ws, lastreserved, &mbstate))
- == (size_t) -1)
- goto err_free;
- outlen += rope[i].len;
- };
- /* allocate the final string. reallocing the first strings saves us the
- * hassle of copying its content and freeing it.
- * It also deals with the common case that the first string was big enough
- * and just needs to be shrunk
- */
- if(! (outbuf = realloc(rope[0].str, outlen)))
- goto err_free;
- /* append the remaining strings */
- outpos = outbuf + rope[0].len;
- for(i = 1; i < ropelen; ++i) {
- outpos = memcpy(outpos, rope[i].str, rope[i].len) + rope[i].len;
- free(rope[i].str);
- }
- *outpos = '\0';
- free(rope);
- return outbuf;
- err_free:
- /* free all entries. The last entry may be NULL if allocation failed.
- * free may call munmap so we preserve errno before calling it
- */
- errno_cpy = errno;
- for(i = 0; i < ropelen; ++i)
- free(rope[i].str);
- free(rope);
- errno = errno_cpy;
- err_rtrn:
- return NULL;
- }
- /**
- * Creates n random valid wchar_ts
- */
- static void make_wchars(wchar_t* ws, size_t n)
- {
- mbstate_t lastvalid, cur;
- size_t i;
- memset(&lastvalid, 0, sizeof(lastvalid));
- for(i = 0; i < n; ++i) {
- char buf[MB_CUR_MAX];
- do {
- memcpy(&cur, &lastvalid, sizeof(cur));
- ws[i] = (wchar_t) (rand() + 1);
- }
- while(wcrtomb(buf, ws[i], &cur) == (size_t) -1);
- memcpy(&lastvalid, &cur, sizeof(lastvalid));
- }
- }
- int main(void)
- {
- typedef char* (*algorithm_t)(const wchar_t*);
- static const algorithm_t algorithms[] = {
- s_wstring_to_string_meas,
- s_wstring_to_string_max,
- s_wstring_to_string_min,
- s_wstring_to_string_guess,
- s_wstring_to_string_rope
- };
- static const char* const names[] = {
- "measure", "max", "min", "guess", "rope"
- };
- const size_t len = 4096, algo_n = sizeof(names) / sizeof(names[0]);
- wchar_t* wstr;
- size_t curlen;
- const char* operation;
- setlocale(LC_ALL, "");
- if(! (wstr = malloc(len * sizeof(wchar_t)))) {
- operation = "Allocating wide characters";
- goto err_rtrn;
- }
- make_wchars(wstr, len - 1);
- wstr[len - 1] = L'\0';
- for(curlen = 1; curlen <= len; curlen *= 2) {
- const wchar_t* cur_wstr;
- size_t repetitions;
- size_t algo_i;
- cur_wstr = wstr + len - curlen;
- repetitions = 4096 * 1024 * 8 / curlen;
- for(algo_i = 0; algo_i < algo_n; ++algo_i) {
- struct timespec starttime, endtime;
- size_t repetition_i;
- double spenttime, time_per_wchar;
- if(clock_gettime(CLOCK_MONOTONIC, &starttime)) {
- operation = "Getting time";
- goto err_free;
- }
- for(repetition_i = 0; repetition_i < repetitions; ++repetition_i) {
- char* mbstr;
- if(! (mbstr = algorithms[algo_i](cur_wstr))) {
- operation = names[algo_i];
- goto err_free;
- }
- free(mbstr);
- }
- if(clock_gettime(CLOCK_MONOTONIC, &endtime)) {
- operation = "Getting time";
- goto err_free;
- }
- spenttime = difftime(endtime.tv_sec, starttime.tv_sec)
- + (endtime.tv_nsec - starttime.tv_nsec) * 1e-9;
- time_per_wchar = spenttime / (repetitions * curlen);
- if(printf("%zu\t%s\t%g\n", curlen, names[algo_i], time_per_wchar) < 0) {
- operation = "Printing results";
- goto err_free;
- }
- }
- }
- free(wstr);
- return 0;
- err_free:
- _free_keep_errno(wstr);
- err_rtrn:
- perror(operation);
- return 1;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement