From a520d98cc8c14b929f031258169b18314425ab10 Mon Sep 17 00:00:00 2001 From: Mario Di Raimondo Date: Fri, 12 Sep 2014 20:00:56 +0200 Subject: [PATCH 1/3] scraping season/episode numbers for sky-it Extended the already present hack to extract such numbers from episode descriptions in Sky IT EPG data. --- src/epggrab/module/opentv.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/epggrab/module/opentv.c b/src/epggrab/module/opentv.c index ded80eb1..f4e24533 100644 --- a/src/epggrab/module/opentv.c +++ b/src/epggrab/module/opentv.c @@ -343,21 +343,30 @@ opentv_parse_event_section if (ev.summary) { regex_t preg; regmatch_t match[3]; + const char *patterns[] = { + " *\\(S ?([0-9]+),? Ep? ?([0-9]+)\\)", + "([0-9]+)'? Stagione +Ep\\. ?([0-9]+)", /* for Sky IT */ + "([0-9]+)'? Stagione", /* for Sky IT */ + NULL }; + int i; /* Parse Series/Episode * TODO: HACK: this needs doing properly */ - regcomp(&preg, " *\\(S ?([0-9]+),? Ep? ?([0-9]+)\\)", - REG_ICASE | REG_EXTENDED); - if (!regexec(&preg, ev.summary, 3, match, 0)) { - epg_episode_num_t en; - memset(&en, 0, sizeof(en)); - if (match[1].rm_so != -1) - en.s_num = atoi(ev.summary + match[1].rm_so); - if (match[2].rm_so != -1) - en.e_num = atoi(ev.summary + match[2].rm_so); - save |= epg_episode_set_epnum(ee, &en, src); + for (i=0; patterns[i]; i++) { + regcomp(&preg, patterns[i], REG_ICASE | REG_EXTENDED); + if (!regexec(&preg, ev.summary, 3, match, 0)) { + epg_episode_num_t en; + memset(&en, 0, sizeof(en)); + if (match[1].rm_so != -1) + en.s_num = atoi(ev.summary + match[1].rm_so); + if (match[2].rm_so != -1) + en.e_num = atoi(ev.summary + match[2].rm_so); + save |= epg_episode_set_epnum(ee, &en, src); + regfree(&preg); + break; /* skip other patterns */ + } else + regfree(&preg); } - regfree(&preg); } } From 8a0a5183b08475dba09baf902758d1d9e3b3134c Mon Sep 17 00:00:00 2001 From: Mario Di Raimondo Date: Fri, 12 Sep 2014 21:56:08 +0200 Subject: [PATCH 2/3] improved regexs management As suggested by perex, moved patterns and regexs as global variables and their management in opentv_init/opentv_done. --- src/epggrab/module/opentv.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/epggrab/module/opentv.c b/src/epggrab/module/opentv.c index f4e24533..17d37b31 100644 --- a/src/epggrab/module/opentv.c +++ b/src/epggrab/module/opentv.c @@ -162,6 +162,15 @@ static epggrab_channel_t *_opentv_find_epggrab_channel * OpenTV event processing * ***********************************************************************/ +/* Patterns for the extraction of season/episode numbers from summary of events*/ +const char *_opentv_se_num_patterns[] = { + " *\\(S ?([0-9]+),? Ep? ?([0-9]+)\\)", /* for ??? */ + "([0-9]+)'? Stagione +Ep\\. ?([0-9]+)", /* for Sky IT */ + "([0-9]+)'? Stagione", /* for Sky IT */ + NULL }; +regex_t *_opentv_se_num_pregs; + + /* Parse huffman encoded string */ static char *_opentv_parse_string ( opentv_module_t *prov, const uint8_t *buf, int len ) @@ -341,20 +350,13 @@ opentv_parse_event_section epg_genre_list_destroy(egl); } if (ev.summary) { - regex_t preg; regmatch_t match[3]; - const char *patterns[] = { - " *\\(S ?([0-9]+),? Ep? ?([0-9]+)\\)", - "([0-9]+)'? Stagione +Ep\\. ?([0-9]+)", /* for Sky IT */ - "([0-9]+)'? Stagione", /* for Sky IT */ - NULL }; int i; /* Parse Series/Episode * TODO: HACK: this needs doing properly */ - for (i=0; patterns[i]; i++) { - regcomp(&preg, patterns[i], REG_ICASE | REG_EXTENDED); - if (!regexec(&preg, ev.summary, 3, match, 0)) { + for (i = 0; _opentv_se_num_patterns[i]; i++) { + if (!regexec(_opentv_se_num_pregs+i, ev.summary, 3, match, 0)) { epg_episode_num_t en; memset(&en, 0, sizeof(en)); if (match[1].rm_so != -1) @@ -362,10 +364,8 @@ opentv_parse_event_section if (match[2].rm_so != -1) en.e_num = atoi(ev.summary + match[2].rm_so); save |= epg_episode_set_epnum(ee, &en, src); - regfree(&preg); break; /* skip other patterns */ - } else - regfree(&preg); + } } } } @@ -809,6 +809,7 @@ static void _opentv_prov_load ( htsmsg_t *m ) void opentv_init ( void ) { htsmsg_t *m; + int i; /* Load dictionaries */ if ((m = hts_settings_load("epggrab/opentv/dict"))) @@ -824,12 +825,19 @@ void opentv_init ( void ) if ((m = hts_settings_load("epggrab/opentv/prov"))) _opentv_prov_load(m); tvhlog(LOG_DEBUG, "opentv", "providers loaded"); + + /* Compile some recurring regular-expressions */ + for (i = 0; _opentv_se_num_patterns[i]; i++) ; /* count the available patterns (NULL-terminated array)*/ + _opentv_se_num_pregs = calloc(i, sizeof(regex_t)); + for (i = 0; _opentv_se_num_patterns[i]; i++) + regcomp(_opentv_se_num_pregs+i, _opentv_se_num_patterns[i], REG_ICASE | REG_EXTENDED); } void opentv_done ( void ) { opentv_dict_t *dict; opentv_genre_t *genre; + int i; while ((dict = RB_FIRST(&_opentv_dicts)) != NULL) { RB_REMOVE(&_opentv_dicts, dict, h_link); @@ -842,6 +850,10 @@ void opentv_done ( void ) free(genre->id); free(genre); } + + for (i = 0; _opentv_se_num_patterns[i]; i++) + regfree(_opentv_se_num_pregs+i); + free(_opentv_se_num_pregs); } void opentv_load ( void ) From 42de59d34d18587b2a159cbc455e5cd7776b3e16 Mon Sep 17 00:00:00 2001 From: Mario Di Raimondo Date: Sat, 13 Sep 2014 00:49:22 +0200 Subject: [PATCH 3/3] several adjustments - make use of ARRAY_SIZE macro; - missing static keyword for global variables; - better patterns for sky-it with possibility to support a description without season specification; - debug messages; --- src/epggrab/module/opentv.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/epggrab/module/opentv.c b/src/epggrab/module/opentv.c index 17d37b31..60f08798 100644 --- a/src/epggrab/module/opentv.c +++ b/src/epggrab/module/opentv.c @@ -163,13 +163,12 @@ static epggrab_channel_t *_opentv_find_epggrab_channel * ***********************************************************************/ /* Patterns for the extraction of season/episode numbers from summary of events*/ -const char *_opentv_se_num_patterns[] = { - " *\\(S ?([0-9]+),? Ep? ?([0-9]+)\\)", /* for ??? */ - "([0-9]+)'? Stagione +Ep\\. ?([0-9]+)", /* for Sky IT */ - "([0-9]+)'? Stagione", /* for Sky IT */ - NULL }; -regex_t *_opentv_se_num_pregs; - +static const char *_opentv_se_num_patterns[] = { + " *\\(S ?([0-9]+),? Ep? ?([0-9]+)\\)", /* for ??? */ + " *([0-9]+)'? Stagione +Ep\\. ?([0-9]+) ?-", /* for Sky IT */ + " *([0-9]+)'? Stagione() ?-", /* for Sky IT */ + "() *Ep\\. ?([0-9]+) ?-" }; /* for Sky IT */ +static regex_t *_opentv_se_num_pregs; /* Parse huffman encoded string */ static char *_opentv_parse_string @@ -355,7 +354,7 @@ opentv_parse_event_section /* Parse Series/Episode * TODO: HACK: this needs doing properly */ - for (i = 0; _opentv_se_num_patterns[i]; i++) { + for (i = 0; i < ARRAY_SIZE(_opentv_se_num_patterns); i++) { if (!regexec(_opentv_se_num_pregs+i, ev.summary, 3, match, 0)) { epg_episode_num_t en; memset(&en, 0, sizeof(en)); @@ -363,6 +362,7 @@ opentv_parse_event_section en.s_num = atoi(ev.summary + match[1].rm_so); if (match[2].rm_so != -1) en.e_num = atoi(ev.summary + match[2].rm_so); + tvhdebug("opentv", " extract from summary season %d episode %d", en.s_num, en.e_num); save |= epg_episode_set_epnum(ee, &en, src); break; /* skip other patterns */ } @@ -827,9 +827,8 @@ void opentv_init ( void ) tvhlog(LOG_DEBUG, "opentv", "providers loaded"); /* Compile some recurring regular-expressions */ - for (i = 0; _opentv_se_num_patterns[i]; i++) ; /* count the available patterns (NULL-terminated array)*/ - _opentv_se_num_pregs = calloc(i, sizeof(regex_t)); - for (i = 0; _opentv_se_num_patterns[i]; i++) + _opentv_se_num_pregs = calloc(ARRAY_SIZE(_opentv_se_num_patterns), sizeof(regex_t)); + for (i = 0; i < ARRAY_SIZE(_opentv_se_num_patterns); i++) regcomp(_opentv_se_num_pregs+i, _opentv_se_num_patterns[i], REG_ICASE | REG_EXTENDED); } @@ -851,7 +850,7 @@ void opentv_done ( void ) free(genre); } - for (i = 0; _opentv_se_num_patterns[i]; i++) + for (i = 0; i < ARRAY_SIZE(_opentv_se_num_patterns); i++) regfree(_opentv_se_num_pregs+i); free(_opentv_se_num_pregs); }