From a19e3705a228735eacadd713b28d49fe33bb726b Mon Sep 17 00:00:00 2001 From: sijanec Date: Wed, 7 Apr 2021 23:15:16 +0200 Subject: UTF-8, image support, memory leaks fixed --- Makefile | 2 +- README.md | 7 ++- debian/changelog | 8 +++ src/api.c | 180 ++++++++++++++++++++++++++++++++++++------------------- src/hp.html | 5 +- src/httpd.c | 29 ++++++--- src/i18n.h | 2 +- src/lib.c | 3 + src/log.c | 2 +- src/main.c | 11 ++++ src/structs.c | 26 ++++---- src/url.c | 6 +- 12 files changed, 193 insertions(+), 88 deletions(-) diff --git a/Makefile b/Makefile index 5db1e09..4a72257 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ default: echo ', 0' >> tmp/hp.xxd xxd -i < src/osdd.xml > tmp/osdd.xxd echo ', 0' >> tmp/osdd.xxd - gcc -Wall -Wextra -pedantic -Wno-unused-parameter -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c + gcc -Wall -Wextra -pedantic -Wno-unused-parameter -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -lm -osear.c install: mkdir -p $(DESTDIR)/usr/bin/ diff --git a/README.md b/README.md index 23e5460..c51bd35 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,8 @@ service sear.c start * a POSIX system * GNU C library -* GNU compiler collection (it's written in GNU C - it uses anonymous functions) +* GNU compiler collection (it's written in GNU C - it uses ~~anonymous~~ nested functions) + - anonymous functions were a pain to debug * GNU Make * libxml2-dev (for the simple HTML/1.0 client and HTML parser) * libmicrohttpd-dev (for serving results - use a reverse proxy, such as nginx, for HTTPS) @@ -34,7 +35,8 @@ make * run the daemon - it starts listening on HTTP port 7327 (remember it by picturing phone keyboard buttons with letters SEAR (; ) * optional: create a reverse proxy for HTTPS * navigate to [http://localhost:7327](http://localhost:7327) and do a couple of searches to see if everything works -* the horseshoe button redirects directly to the first result without wasting time on the results page. use if you feel lucky. (BP) +* the horseshoe button redirects directly to the first result without wasting time on the results page. use if you feel lucky. (BP) +* the painting button performs a search for images. PRIVACY WARNING: images are loaded directly from servers (not from google) * check logs by navigating to /logs.html * program also writes all logs to standard error @@ -55,3 +57,4 @@ before downloading, check that the build passed, indicated below on the badge: ![screenshot in chromium 2](https://cdn.sijanec.eu/img/2021/04/sear.c_prtsc2.png) ![screenshot in chromium 3](https://cdn.sijanec.eu/img/2021/04/sear.c_prtsc3.png) ![screenshot in chromium 4](https://cdn.sijanec.eu/img/2021/04/sear.c_prtsc4.png) +![screenshot in chromium 5](https://cdn.sijanec.eu/img/2021/04/sear.c_prtsc5.png) diff --git a/debian/changelog b/debian/changelog index 21823c1..bec307a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +sear.c (0.0.11-1) stable; urgency=low + + * added image search, fixed some memory leaks with valgrind + * stopped using anonymous functions, hard to debug + * TODO: still some memory leaks in xml documents + + -- Anton Luka Šijanec Tue, 07 Apr 2021 23:00:00 +0200 + sear.c (0.0.10-1) stable; urgency=low * test diff --git a/src/api.c b/src/api.c index 76431cc..9a9bbc8 100644 --- a/src/api.c +++ b/src/api.c @@ -48,9 +48,10 @@ char * sc_api (struct sc_cache * c, char * body, char * headers, int isfmt, char buf_length += readstatus; if (buf_sizeof-buf_length < SC_HTTP_RBUFSIZE) { buf_sizeof *= SC_REALLOC_K; - buf = realloc(buf, sizeof(char)*buf_sizeof); + buf = realloc(buf, sizeof(char)*buf_sizeof); /* this IS safe, no matter how hard valgrind complains */ } } + buf[buf_length++] = '\0'; if (readstatus == -1) SC_LOG(SC_LOG_ERROR, c, "readstatus == -1, endpoint: %s", endpoint_formatted ? endpoint_formatted : endpoint); xmlNanoHTTPClose(r); @@ -123,7 +124,7 @@ int sc_fix_url (char ** h) { /* fixes a (result) URL in-place (removes tracking } /* TODO: be pedantic and remove utm_source and other tracking bullshit */ return 1; } -struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct sc_query * q) { /* check for cached queries first! */ +struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct sc_query * q, SC_OPT_TYPE opt) { /* check4cachedB4 */ /* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */ /* if query is not NULL, it MUST be initialized */ /* @@ -147,7 +148,13 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s based http request-response based user interface so we can ask the user to complete the captcha. this is not yet implemeted and will be hard work. */ - int rs; + int rs = 1; + char * xpath = NULL; + char * descclass = NULL; + char * titleclass = NULL; + char * imageclass = NULL; + htmlDocPtr xmldoc = NULL; + char * txtdoc = NULL; if (!s || !c) { rs = -1; goto rc; @@ -159,10 +166,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s qwasgiven++; char * us = malloc(sizeof(char)*strlen(s)*3+1); urlencode(us, s); - char * xpath = NULL; - char * descclass = NULL; - char * titleclass = NULL; - char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100", us); + txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100&ie=UTF-8%s", us, (opt&SC_OPT_IMAGE) ? "&tbm=isch" : ""); // fprintf(stdout, "%s\n", txtdoc); free(us); if (!txtdoc) { @@ -170,91 +174,141 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s rs = -2; goto rc; } - titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}"); - descclass = sc_find_class(txtdoc, "{word-break:break-word}"); - if (!titleclass || !descclass) { - SC_LOG(SC_LOG_ERROR, c, "!titleclass || !descclass"); - rs = -3; - goto rc; + if (opt & SC_OPT_IMAGE) { + imageclass = sc_find_class(txtdoc, "{font-family:Roboto,Helvetica,Arial,sans-serif}"); + if (!imageclass) { + SC_LOG(SC_LOG_ERROR, c, "!imageclass, txtdoc = %s", txtdoc); + rs = -3; + goto rc; + } + } else { + titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}"); + descclass = sc_find_class(txtdoc, "{word-break:break-word}"); + if (!titleclass || !descclass) { + SC_LOG(SC_LOG_ERROR, c, "!titleclass || !descclass"); + rs = -4; + goto rc; + } } #define SC_GTXF "/html/body//a[contains(@class, '%s')]" /* result a */ #define SC_GTXD /* description */ "../..//table//span[@class='%s']" #define SC_GTXB /* breadcrumbs */ ".//span[@class='%s']" +#define SC_GTXI "//div[@class='%s']//a" #define SC_GTR q->results[q->results_length-1] - xpath = malloc(strlen(titleclass)+strlen(SC_GTXF)); - sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */ - htmlDocPtr xmldoc = parseHtmlDocument(txtdoc, NULL); + xpath = malloc(strlen((opt & SC_OPT_IMAGE) ? imageclass : titleclass)+strlen((opt & SC_OPT_IMAGE) ? SC_GTXI : SC_GTXF)); + sprintf(xpath, (opt & SC_OPT_IMAGE) ? SC_GTXI : SC_GTXF, (opt & SC_OPT_IMAGE) ? imageclass : titleclass); + xmldoc = parseHtmlDocument(txtdoc, NULL); if (qwasgiven) /* as you can see, when q is given, queries will be write-locked for the whole XML processing time! */ SC_CWLE(c, c->queries_lock); q->results_length = 0; gnu_code_start; - eachNodeX(xmldoc, xpath, - lambda(void, (xmlNodePtr node, void * data), - { - if (node->type == XML_ELEMENT_NODE) { - xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href"); - if (href) { - char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href"); /* fuck rules, I will rewrite it anyways <= hi future me */ - sc_fix_url(&hreflink); - char * x = malloc(strlen(descclass)+strlen(SC_GTXD)); - char * xbread = malloc(strlen(descclass)+strlen(SC_GTXB)); - sprintf(x, SC_GTXD, descclass /* remember, kids, GNU C is fucking legendary */); - sprintf(xbread, SC_GTXB, descclass /* remember, kids, GNU C is fucking legendary */); - xmlNodePtr descnode = nthNodeXN(node, x, 0); - if (!descnode) /* description may be above, see https://support.google.com/websearch?p=featured_snippets */ - descnode = nthNodeXN(node, "../../div/div", 0); - xmlNodePtr breadnode = nthNodeXN(node, xbread, 0); - free(x); - free(xbread); - if (q->results_sizeof <= q->results_length) - SC_BIGGER_ARRAY(q->results, sc_result); - q->results_length++; - SC_GTR->query = q; - SC_GTR->title = (char *) xmlNodeGetContent(node->children); - if (!SC_GTR->title) { - SC_GTR->title = malloc(strlen(SC_I18N_NO_TITLE)+1); - strcpy(SC_GTR->title, SC_I18N_NO_TITLE); - } - SC_GTR->url = hreflink; - if (!SC_GTR->url) { - SC_GTR->url = malloc(strlen(SC_I18N_NO_HREFLINK)+1); - strcpy(SC_GTR->url, SC_I18N_NO_HREFLINK); - } - SC_GTR->desc = (char *) xmlNodeGetContent(descnode); - if (!SC_GTR->desc) { - SC_GTR->desc = malloc(strlen(SC_I18N_NO_DESCRIPTION)+1); - strcpy(SC_GTR->desc, SC_I18N_NO_DESCRIPTION); - } - SC_GTR->breadcrumbs = (char *) xmlNodeGetContent(breadnode); - if (!SC_GTR->breadcrumbs) { - SC_GTR->breadcrumbs = malloc(strlen(SC_GTR->url)+1); - strcpy(SC_GTR->breadcrumbs, SC_GTR->url); - } - } + void sc_query_google_eachnode (xmlNodePtr node, void * data) { + if (node->type == XML_ELEMENT_NODE) { + xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href"); + if (href) { + char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href"); /* xmlGetProp copies and allocates */ + if (!hreflink) { + SC_LOG(SC_LOG_ERROR, c, "!hreflink"); + rs = -5; + return; + } + if (opt & SC_OPT_IMAGE) { + char * imgurl = NULL; /* do not free those when allocated by sscanf, as they will directly go into the struct. */ + char * imgrefurl = NULL; /* easy, huh? */ + SC_LOG(SC_LOG_DEBUG, c, "hreflink = %s", hreflink); + sscanf(hreflink, "/imgres?imgurl=%m[^&]&imgrefurl=%m[^&]", &imgurl, &imgrefurl); + if (!imgurl && !imgrefurl) { + SC_LOG(SC_LOG_ERROR, c, "!imgurl && !imgrefurl"); + /* rs = -6; */ /* we continue running not fail because of a single picture */ + free(imgurl); + free(imgrefurl); + return; /* check! */ + } + urldecode(imgurl, imgurl); + urldecode(imgrefurl, imgrefurl); + if (q->results_sizeof <= q->results_length) + SC_BIGGER_ARRAY(q->results, sc_result, 1); + q->results_length++; + SC_GTR->query = q; + SC_GTR->title = NULL; /* can't get title from here, would have to load /imgres, which is bloat */ + SC_GTR->url = imgrefurl; + SC_GTR->desc = imgurl; + SC_GTR->breadcrumbs = NULL; + } else { + char * orig_hreflink_for_free = hreflink; + sc_fix_url(&hreflink); + char * x = malloc(strlen(descclass)+strlen(SC_GTXD)); + char * xbread = malloc(strlen(descclass)+strlen(SC_GTXB)); + sprintf(x, SC_GTXD, descclass /* remember, kids, GNU C is fucking legendary */); + sprintf(xbread, SC_GTXB, descclass /* remember, kids, GNU C is fucking legendary */); + xmlNodePtr descnode = nthNodeXN(node, x, 0); + if (!descnode) /* description may be above, see https://support.google.com/websearch?p=featured_snippets */ + descnode = nthNodeXN(node, "../../div/div", 0); + xmlNodePtr breadnode = nthNodeXN(node, xbread, 0); + free(x); + free(xbread); + if (q->results_sizeof <= q->results_length) + SC_BIGGER_ARRAY(q->results, sc_result, 1); + q->results_length++; + SC_GTR->query = q; + char * cp = (char *) xmlNodeGetContent(node->children); + if (cp) { + SC_GTR->title = malloc(strlen(cp)+1); + strcpy(SC_GTR->title, cp); + xmlFree(cp); + } else SC_GTR->title = NULL; + if (hreflink) { + SC_GTR->url = malloc(strlen(hreflink)+1); + strcpy(SC_GTR->url, hreflink); + xmlFree(orig_hreflink_for_free); + } else SC_GTR->url = NULL; + cp = (char *) xmlNodeGetContent(descnode); + if (cp) { + SC_GTR->desc = malloc(strlen(cp)+1); + strcpy(SC_GTR->desc, cp); + xmlFree(cp); + } else SC_GTR->desc = NULL; + cp = (char *) xmlNodeGetContent(breadnode); + if (cp) { + SC_GTR->breadcrumbs = malloc(strlen(cp)+1); + strcpy(SC_GTR->breadcrumbs, cp); + xmlFree(cp); } } - ), - NULL); + } + } + } + eachNodeX(xmldoc, xpath, sc_query_google_eachnode, NULL); gnu_code_end; + if (rs < 0) { + SC_LOG(SC_LOG_ERROR, c, "rs < 0 (rs == %d)", rs); + if (qwasgiven) + SC_CUE(c, c->queries_lock); + goto rc; + } q->cache = c; q->lookup_time = time(NULL); q->engines = SC_ENGINE_GOOGLE; q->string = realloc(q->string, strlen(s)+1); + q->opt = opt; strcpy(q->string, s); if (!qwasgiven) { SC_CWLE(c, c->queries_lock); if (c->queries_sizeof <= c->queries_length) - SC_BIGGER_ARRAY(c->queries, sc_query); + SC_BIGGER_ARRAY(c->queries, sc_query, 0); c->queries_length++; #define SC_GTQ c->queries[c->queries_length-1] SC_GTQ = q; } SC_CUE(c, c->queries_lock); - xmlFreeDoc(xmldoc); rc: + if (!qwasgiven && rs < 0) + sc_query_free(q); + xmlFreeDoc(xmldoc); free(txtdoc); free(titleclass); free(descclass); + free(imageclass); free(xpath); return (rs < 0) ? NULL : q; } diff --git a/src/hp.html b/src/hp.html index 18169da..6bb15db 100644 --- a/src/hp.html +++ b/src/hp.html @@ -58,6 +58,9 @@ .result p { margin-top: 0.314159265358em; } + .result img { + height: 10em; + } @@ -65,7 +68,7 @@ - +

diff --git a/src/httpd.c b/src/httpd.c index 656ad92..6e2c4bf 100644 --- a/src/httpd.c +++ b/src/httpd.c @@ -11,14 +11,24 @@ char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string } #define SC_HRF "

%s " \ "%s

%s

" -#define SC_HRA i, safeurl, i, safetitle, safebreadcrumbs, safebody - char * safetitle = htmlspecialchars(q->results[i]->title); +#define SC_HIF "" +#define SC_HRA i, safeurl ? safeurl : SC_I18N_NO_HREFLINK, i, safetitle ? safetitle : SC_I18N_NO_TITLE, \ + safebreadcrumbs ? safebreadcrumbs : safeurl ? safeurl : SC_I18N_NO_HREFLINK, safebody ? safebody : SC_I18N_NO_DESCRIPTION + char * safetitle = htmlspecialchars(q->results[i]->title); /* htmlspecialchars returns NULL if input is null */ char * safebody = htmlspecialchars(q->results[i]->desc); char * safeurl = htmlspecialchars(q->results[i]->url); char * safebreadcrumbs = htmlspecialchars(q->results[i]->breadcrumbs); - size_t ws = snprintf(NULL, 0, SC_HRF, SC_HRA); - SC_HRC(resultshtml, ws); - resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, SC_HRA); + size_t ws; + if (q->opt & SC_OPT_IMAGE) { + ws = snprintf(NULL, 0, SC_HIF, SC_HRA); + SC_HRC(resultshtml, ws); + resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HIF, SC_HRA); + } else { + ws = snprintf(NULL, 0, SC_HRF, SC_HRA); + SC_HRC(resultshtml, ws); + resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, SC_HRA); + } free(safebreadcrumbs); free(safetitle); free(safebody); @@ -103,6 +113,9 @@ int sc_httpd (void * cls, char * location = "//git.sijanec.eu/sijanec/sear.c"; char * content_type = "text/html"; int status_code = MHD_HTTP_OK; + SC_OPT_TYPE opt = 0; + if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "i")) + opt |= SC_OPT_IMAGE; if (!host) host = ""; struct sc_query * q = NULL; @@ -143,18 +156,18 @@ int sc_httpd (void * cls, retry: SC_CRLE(c, c->queries_lock); for (size_t i = 0; i < c->queries_length; i++) - if (!strcmp(c->queries[i]->string, query)) + if (!strcmp(c->queries[i]->string, query) && c->queries[i]->opt == opt) q = c->queries[i]; if (q) { response = sc_queryhtml(q); /* MHD_create_response_from_buffer will free response (; */ if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "f") && q->results_length > 0) { status_code = 307; - location = q->results[0]->url; + location = q->results[0]->url ? q->results[0]->url : SC_I18N_NO_HREFLINK; } SC_CUE(c, c->queries_lock); } else { SC_CUE(c, c->queries_lock); - sc_query_google(query, c, NULL); + sc_query_google(query, c, NULL, opt); if (already_retried++) { char * safequery = htmlspecialchars(query); response = malloc(strlen((char*) sc_hp)+strlen(safequery)*2+strlen(SC_I18N_HP_ERROR_HEADING)+strlen(SC_I18N_HP_ERROR_BODY)); diff --git a/src/i18n.h b/src/i18n.h index 7335100..9b5a7cc 100644 --- a/src/i18n.h +++ b/src/i18n.h @@ -11,7 +11,7 @@ #define SC_I18N_LOCKING "zaklepanje" #define SC_I18N_FAILED "ni uspelo" #define SC_I18N_HP_ERROR_HEADING "napaka!" -#define SC_I18N_HP_ERROR_BODY "Pri pridobivanju rezultatov je api klic odvrnil s kodo, ki označuje neuspelo stanje. " \ +#define SC_I18N_HP_ERROR_BODY "Pridobivanje rezultatov ni uspelo. Mogoče ni rezultatov. " \ "Preberite dnevniške zapise." #define SC_I18N_LOGS "dnevniški zapisi" #define SC_I18N_LOGS_ERROR "napaka pri branju dnevniških datotek" diff --git a/src/lib.c b/src/lib.c index 5c0576e..8659431 100644 --- a/src/lib.c +++ b/src/lib.c @@ -84,6 +84,7 @@ void printNode (xmlNodePtr node, void * data) { #define gnu_code_start \ _Pragma ("GCC diagnostic push") \ _Pragma ("GCC diagnostic ignored \"-Wpedantic\"") + _Pragma ("GCC diagnostic ignored \"-Wformat=\"") #define gnu_code_end \ _Pragma ("GCC diagnostic pop") /* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */ @@ -94,6 +95,8 @@ void printNode (xmlNodePtr node, void * data) { &l_anonymous_functions_name; \ }) char * htmlspecialchars (const char * i) { /* remember to free the output */ + if (!i) + return NULL; size_t s = 128; char * o = malloc(s); size_t w = 0; diff --git a/src/log.c b/src/log.c index 5e4dc16..6e3fbd0 100644 --- a/src/log.c +++ b/src/log.c @@ -32,7 +32,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f if (pthread_rwlock_wrlock(lock)) return -3; if (c->logentries_sizeof <= c->logentries_length) - SC_BIGGER_ARRAY(c->logentries, sc_logentry); + SC_BIGGER_ARRAY(c->logentries, sc_logentry, 1); c->logentries_length++; size_t strlenm = strlen(m); size_t va_count = parse_printf_format(m, 0, NULL); diff --git a/src/main.c b/src/main.c index 4bf41eb..8b43389 100644 --- a/src/main.c +++ b/src/main.c @@ -5,8 +5,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -46,6 +48,9 @@ char sc_securitytxt[] = "# This content information is provided by the developer #include #include /* this is new in my programs. I am now using _sizeof for the actual alloced size of the array and _length for the count of elements in array. this is done to decrease number of calls to realloc&amis */ +void sc_signalhander (int s) { + return; +} int main (int argc, char ** argv) { int rs = 0; struct sc_cache * c = sc_cache_init(); @@ -54,13 +59,19 @@ int main (int argc, char ** argv) { rs = 1; goto rc; } + xmlInitParser(); d = MHD_start_daemon(MHD_USE_THREAD_PER_CONNECTION, SC_HTTP_PORT, NULL, NULL, &sc_httpd, c, MHD_OPTION_END); if (!d) { rs = 2; goto rc; } + signal(SIGTERM, sc_signalhander); + signal(SIGINT, sc_signalhander); pause(); + fprintf(stderr, "cleaning up!\n"); + fflush(stderr); rc: + xmlCleanupParser(); sc_cache_free(c); MHD_stop_daemon(d); return rs; diff --git a/src/structs.c b/src/structs.c index 2d83f74..dce460e 100644 --- a/src/structs.c +++ b/src/structs.c @@ -1,4 +1,4 @@ -#define SC_ALLOC_CHUNK 128 /* how many x to alloc when allocing (for performance so we don't call malloc over and over again) */ +#define SC_ALLOC_CHUNK 1 /* how many x to alloc when allocing (for performance so we don't call malloc over and over again) */ #define SC_IN_STRUCT_ARRAY(type, name) _Atomic(type **) name; _Atomic(size_t) name##_sizeof; _Atomic(size_t) name##_length #define SC_CWLE(c, name) (pthread_rwlock_wrlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_LOCKING " " #name " " SC_I18N_FAILED)||1) :0) #define SC_CRLE(c, name) (pthread_rwlock_rdlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_LOCKING " " #name " " SC_I18N_FAILED)||1) :0) @@ -11,12 +11,14 @@ #define SC_LOG_WARNING (1 << 1) #define SC_LOG_INFO (1 << 2) #define SC_LOG_DEBUG (1 << 3) -#define SC_BIGGER_ARRAY(name, type) do { \ - name = realloc(name, sizeof(name[0])*name##_sizeof*SC_REALLOC_K); \ - for (size_t i = name##_sizeof; i < name##_sizeof*SC_REALLOC_K; i++) \ +#define SC_BIGGER_ARRAY(name, type, shallinit) do { \ + name = realloc(name, sizeof(name[0])*ceil(name##_sizeof*SC_REALLOC_K)); \ + for (size_t i = name##_sizeof; shallinit && (i < ceil(name##_sizeof*SC_REALLOC_K)); i++) \ name[i] = type##_init(); \ - name##_sizeof = name##_sizeof*SC_REALLOC_K; \ + name##_sizeof = ceil(name##_sizeof*SC_REALLOC_K); /* ceil je ZELO pomemben, če je chunk 1 recimo */ \ } while (0); +#define SC_OPT_TYPE unsigned char +#define SC_OPT_IMAGE (1 << 0) struct sc_logentry { unsigned char type; /* SC_LOG_ERROR, SC_LOG_WARNING, SC_LOG_INFO, SC_LOG_DEBUG */ size_t line; @@ -30,8 +32,8 @@ struct sc_logentry * sc_logentry_init (); /* defined in log.c */ struct sc_result { struct sc_query * query; /* nofree - free from sc_cache */ - char * url; /* yesfree */ - char * desc; /* yesfree */ + char * url; /* yesfree - url of referer page when image searching */ + char * desc; /* yesfree - url of image when image searching */ char * title; /* yesfree */ time_t date; /* some search engines like to extract a date from a website, store that here - not implemented */ unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */ @@ -62,6 +64,7 @@ struct sc_query { char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */ time_t lookup_time; /* time of last lookup */ unsigned char engines; /* with what engine(s) was the query done - bitmask - if there are results from multiple engines */ + SC_OPT_TYPE opt; /* some options */ }; struct sc_query * sc_query_init () { struct sc_query * q = calloc(1, sizeof(struct sc_query)); @@ -80,6 +83,7 @@ int sc_query_free (struct sc_query * q) { free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */ for (size_t i = 0; i < q->results_sizeof; i++) sc_result_free(q->results[i]); + free(q->results); free(q); return 1; } @@ -95,9 +99,9 @@ struct sc_cache * sc_cache_init() { c->logentries_sizeof = SC_ALLOC_CHUNK; c->queries = calloc(c->queries_sizeof, sizeof(struct sc_query *)); c->logentries = calloc(c->logentries_sizeof, sizeof(struct sc_logentry *)); - for (size_t i = 0; i < c->queries_sizeof; i++) { - c->queries[i] = sc_query_init(); - c->queries[i]->cache = c; + for (size_t i = 0; i < c->logentries_sizeof; i++) { + /* c->queries[i] = sc_query_init(); */ /* queries are not inited for performance reasons, they are inited by query function */ + /* c->queries[i]->cache = c; */ c->logentries[i] = sc_logentry_init(); } #define SC_CILI(name) do { name##_lock = malloc(sizeof(pthread_rwlock_t)); pthread_rwlock_init(name##_lock, NULL); } while (0) @@ -108,11 +112,13 @@ struct sc_cache * sc_cache_init() { int sc_cache_free(struct sc_cache * c) { if (!c) return -1; + fprintf(stderr, "c->queries_sizeof = %lu\n", c->queries_sizeof); for (size_t i = 0; i < c->queries_sizeof; i++) sc_query_free(c->queries[i]); free(c->queries); for (size_t i = 0; i < c->logentries_sizeof; i++) sc_logentry_free(c->logentries[i]); + free(c->logentries); #define SC_CFLD(name) do { pthread_rwlock_destroy(name##_lock); free(name##_lock); } while(0) SC_CFLD(c->queries); SC_CFLD(c->logentries); diff --git a/src/url.c b/src/url.c index df93138..13081a0 100644 --- a/src/url.c +++ b/src/url.c @@ -1,4 +1,6 @@ int urlencode (char * o, const char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) { + if (!o || !i) + return -2; size_t written = 0; /* o CANNOT be equal to i, unlike in urldecode */ for (; *i; i++) { if (isalnum(*i) || *i == '.' || *i == '_' || *i == '-' || *i == '~') { @@ -12,6 +14,8 @@ int urlencode (char * o, const char * i /* o must have at least strlen(i)*3+1 by return 1; } int urldecode (char * o, const char * i /* o must have at least strlen(i)+1 bytes memory allocated */) { + if (!o || !i) + return -2; size_t written = 0; /* o can be equal to i for decoding in-place */ char buf[] = "00"; for (; *i; i++) { @@ -20,7 +24,7 @@ int urldecode (char * o, const char * i /* o must have at least strlen(i)+1 byte buf[1] = *++i; if (!buf[0] || !buf[1]) { /* malformed */ o[written++] = '\0'; - return 0; + return -1; } o[written++] = strtol(buf, NULL, 16); } else { -- cgit v1.2.3