Merge pull request #486 from diraimondo/master

moved opentv patterns from code-level to the provider configuration file
This commit is contained in:
perexg 2014-09-19 22:34:54 +02:00
commit 885d9ff077
4 changed files with 156 additions and 75 deletions

View file

@ -12,5 +12,11 @@
],
"summary": [
64, 65, 66, 67, 68, 69, 70, 71
],
"season_num": [
" *\\(S ?([0-9]+),? Ep? ?[0-9]+\\)"
],
"episode_num": [
" *\\(S ?[0-9]+,? Ep? ?([0-9]+)\\)"
]
}

View file

@ -12,5 +12,35 @@
],
"summary": [
64, 65, 66, 67, 68, 69, 70, 71
],
"season_num": [
"([0-9]+)'?a? Stagione +Ep\\. ?[0-9]+[a-z]?",
"([0-9]+)'?a? Stagione -? ?Puntata ?[0-9]+",
"([0-9]+)'?a? Stagione"
],
"episode_num": [
"[0-9]+'?a? Stagione +Ep\\. ?([0-9]+)[a-z]?",
"[0-9]+'?a? Stagione -? ?Puntata ?([0-9]+)",
"^ *Ep\\. ?([0-9]+)[a-z]?",
"^ *Puntata ?([0-9]+)"
],
"part_num": [
"[0-9]+'?a? Stagione +Ep\\. ?[0-9]+([a-z]?)",
"^ *Ep\\. ?[0-9]+([a-z]?)"
],
"subtitle": [
"[0-9]+'?a? Stagione +Ep\\. ?[0-9]+[A-Za-z]? -? ?'(([^']*(' [^A-Z0-9-])?('[^ '])?)+)'",
"Ep\\. ?[0-9]+[A-Za-z]? -? ?'(([^']*(' [^A-Z0-9-])?('[^ '])?)+)'",
"[0-9]+'?a? Stagione -? ?'(([^']*(' [^A-Z0-9-])?('[^ '])?)+)'",
"[0-9]+'?a? Stagione -? ?Puntata ?[0-9]+[A-Za-z]? \"\" *([^\"]+) *\"\""
],
"__reference": [
"4' Stagione Ep.9B - 'L'Hub' Gli agenti ...: sara' Ward ...",
"3a Stagione - Puntata 1 ...",
"4' Stagione - 'Title' ...",
"Ep.9A ...",
"Puntata 5 ...",
"Ep.4 - 'Title' ...",
"7 Stagione Puntata 8 \"\" Title \"\" ..."
]
}

View file

@ -13,5 +13,11 @@
],
"summary": [
64, 65, 66, 67, 68, 69, 70, 71
],
"season_num": [
" *\\(S ?([0-9]+),? Ep? ?[0-9]+\\)"
],
"episode_num": [
" *\\(S ?[0-9]+,? Ep? ?([0-9]+)\\)"
]
}

View file

@ -59,6 +59,15 @@ typedef struct opentv_genre
RB_ENTRY(opentv_genre) h_link;
} opentv_genre_t;
typedef struct opentv_pattern
{
char *text;
regex_t *compiled;
TAILQ_ENTRY(opentv_pattern) p_links;
} opentv_pattern_t;
TAILQ_HEAD(opentv_pattern_list, opentv_pattern);
typedef struct opentv_pattern_list opentv_pattern_list_t;
/* Provider configuration */
typedef struct opentv_module_t
{
@ -72,7 +81,10 @@ typedef struct opentv_module_t
int *summary;
opentv_dict_t *dict;
opentv_genre_t *genre;
opentv_pattern_list_t *p_snum;
opentv_pattern_list_t *p_enum;
opentv_pattern_list_t *p_pnum;
opentv_pattern_list_t *p_subt;
} opentv_module_t;
/*
@ -163,24 +175,6 @@ static epggrab_channel_t *_opentv_find_epggrab_channel
* OpenTV event processing
* ***********************************************************************/
/* Patterns for the extraction of season/episode numbers from summary of events*/
static const char *_opentv_se_num_patterns[] = {
" *\\(S ?([0-9]+),? Ep? ?([0-9]+)\\)", /* for ??? */
"([0-9]+)'?a? Stagione +Ep\\. ?([0-9]+)([a-z]?)", /* for Sky IT, ex.: 4' Stagione Ep.9B ... */
"([0-9]+)'?a? Stagione -? ?Puntata ?([0-9]+)", /* for Sky IT, ex.: 3a Stagione - Puntata 1 ... */
"([0-9]+)'?a? Stagione()", /* for Sky IT, ex.: 4' Stagione ... */
"^() *Ep\\. ?([0-9]+)([a-z]?)", /* for Sky IT, ex.: Ep.9A ... */
"^() *Puntata ?([0-9]+)" }; /* for Sky IT, ex.: Puntata 5 ... */
static regex_t *_opentv_se_num_pregs;
/* Patterns for the extraction of subtitles from summary of events*/
static const char *_opentv_subtitle_patterns[] = {
"[0-9]+'?a? Stagione +Ep\\. ?[0-9]+[A-Za-z]? -? ?'(([^']*(' [^A-Z0-9])?('[^ '])?)+)'", /* for Sky IT, ex.: 1' Stagione Ep.7 - 'L'Hub' Gli agenti ...: sara' Ward ... // 4' Stagione Ep.9 'Title' ... */
"Ep\\. ?[0-9]+[A-Za-z]? -? ?'(([^']*(' [^A-Z0-9])?('[^ '])?)+)'", /* for Sky IT, ex.: Ep.4 - 'Title' ... */
"[0-9]+'?a? Stagione -? ?'(([^']*(' [^A-Z0-9])?('[^ '])?)+)'", /* for Sky IT, ex.: 4' Stagione - 'P.R.' ... */
"[0-9]+'?a? Stagione -? ?Puntata ?[0-9]+[A-Za-z]? \"\" *([^\"]+) *\"\""}; /* for Sky IT, ex.: 7 Stagione Puntata 8 "" Title "" ... */
static regex_t *_opentv_subtitle_pregs;
/* Parse huffman encoded string */
static char *_opentv_parse_string
( opentv_module_t *prov, const uint8_t *buf, int len )
@ -271,6 +265,29 @@ static int _opentv_parse_event
return slen+4;
}
static void *_opentv_apply_pattern_list(char *buf, size_t size_buf, const char *text, opentv_pattern_list_t *l)
{
regmatch_t match[2];
opentv_pattern_t *p;
int size;
if (!l) return NULL;
/* search and report the first match */
TAILQ_FOREACH(p, l, p_links)
if (p->compiled && !regexec(p->compiled, text, 2, match, 0) && match[1].rm_so != -1) {
size = MIN(match[1].rm_eo - match[1].rm_so, size_buf - 1);
while (size > 0 && isspace(text[match[1].rm_so + size - 1]))
size--;
memcpy(buf, text + match[1].rm_so, size);
buf[size] = '\0';
if (size) {
tvhtrace("opentv"," pattern \"%s\" matches with '%s'", p->text, buf);
return buf;
}
}
return NULL;
}
/* Parse an event section */
static int
opentv_parse_event_section
@ -323,11 +340,11 @@ opentv_parse_event_section
/* Summary / Description */
if (ev.summary) {
tvhdebug("opentv", " summary %s", ev.summary);
tvhdebug("opentv", " summary '%s'", ev.summary);
save |= epg_broadcast_set_summary(ebc, ev.summary, lang, src);
}
if (ev.desc) {
tvhdebug("opentv", " desc %s", ev.desc);
tvhdebug("opentv", " desc '%s'", ev.desc);
save |= epg_broadcast_set_description(ebc, ev.desc, lang, src);
}
@ -356,7 +373,7 @@ opentv_parse_event_section
while (size > 0 && isspace(ev.title[size - 1]))
ev.title[--size] = '\0';
tvhdebug("opentv", " title %s", ev.title);
tvhdebug("opentv", " title '%s'", ev.title);
save |= epg_episode_set_title(ee, ev.title, lang, src);
}
if (ev.cat) {
@ -366,44 +383,36 @@ opentv_parse_event_section
epg_genre_list_destroy(egl);
}
if (ev.summary) {
regmatch_t match[4];
char buf[1024];
int i,size;
epg_episode_num_t en;
/* Parse Series/Episode */
for (i = 0; i < ARRAY_SIZE(_opentv_se_num_patterns); i++) {
if (!regexec(_opentv_se_num_pregs+i, ev.summary, 4, match, 0)) {
epg_episode_num_t en;
memset(&en, 0, sizeof(en));
if (match[1].rm_so != -1)
en.s_num = atoi(ev.summary + match[1].rm_so);
if (match[2].rm_so != -1)
en.e_num = atoi(ev.summary + match[2].rm_so);
if (match[3].rm_so != -1) {
if (ev.summary[match[3].rm_so] >= 'a' && ev.summary[match[3].rm_so] <= 'z')
en.p_num = ev.summary[match[3].rm_so] - 'a' + 1;
else
if (ev.summary[match[3].rm_so] >= 'A' && ev.summary[match[3].rm_so] <= 'Z')
en.p_num = ev.summary[match[3].rm_so] - 'A' + 1;
}
tvhdebug("opentv", " extract from summary season %d episode %d part %d", en.s_num, en.e_num, en.p_num);
save |= epg_episode_set_epnum(ee, &en, src);
break; /* skip other patterns */
}
memset(&en, 0, sizeof(en));
/* search for season number */
if (_opentv_apply_pattern_list(buf, sizeof(buf), ev.summary, mod->p_snum))
if ((en.s_num = atoi(buf)))
tvhtrace("opentv"," extract season number %d", en.s_num);
/* ...for episode number */
if (_opentv_apply_pattern_list(buf, sizeof(buf), ev.summary, mod->p_enum))
if ((en.e_num = atoi(buf)))
tvhtrace("opentv"," extract episode number %d", en.e_num);
/* ...for part number */
if (_opentv_apply_pattern_list(buf, sizeof(buf), ev.summary, mod->p_pnum)) {
if (buf[0] >= 'a' && buf[0] <= 'z')
en.p_num = buf[0] - 'a' + 1;
else
if (buf[0] >= 'A' && buf[0] <= 'Z')
en.p_num = buf[0] - 'A' + 1;
if (en.p_num)
tvhtrace("opentv"," extract part number %d", en.p_num);
}
/* save any found number */
if (en.s_num || en.e_num || en.p_num)
save |= epg_episode_set_epnum(ee, &en, src);
/* Parse Subtitle */
for (i = 0; i < ARRAY_SIZE(_opentv_subtitle_patterns); i++) {
if (!regexec(_opentv_subtitle_pregs+i, ev.summary, 2, match, 0) && match[1].rm_so != -1) {
size = MIN(match[1].rm_eo - match[1].rm_so, sizeof(buf) - 1);
while (size > 0 && isspace(ev.summary[match[1].rm_so + size - 1]))
size--;
memcpy(buf, ev.summary + match[1].rm_so, size);
buf[size] = '\0';
tvhdebug("opentv", " extract from summary subtitle %s", buf);
save |= epg_episode_set_subtitle(ee, buf, lang, src);
break; /* skip other patterns */
}
/* ...for subtitle */
if (_opentv_apply_pattern_list(buf, sizeof(buf), ev.summary, mod->p_subt)) {
tvhtrace("opentv", " extract subtitle '%s'", buf);
save |= epg_episode_set_subtitle(ee, buf, lang, src);
}
}
}
@ -664,6 +673,30 @@ static int* _pid_list_to_array ( htsmsg_t *m )
return ret;
}
static opentv_pattern_list_t* _opentv_compile_pattern_list ( htsmsg_t *l )
{
opentv_pattern_list_t *ret;
opentv_pattern_t *pattern;
htsmsg_field_t *f;
if (!l) return NULL;
ret = calloc(1, sizeof(opentv_pattern_list_t));
TAILQ_INIT(ret);
HTSMSG_FOREACH(f, l) {
pattern = calloc(1, sizeof(opentv_pattern_t));
pattern->text = strdup(htsmsg_field_get_str(f));
pattern->compiled = calloc(1, sizeof(regex_t));
if (regcomp(pattern->compiled, pattern->text, REG_EXTENDED)) {
tvhlog(LOG_WARNING, "opentv", " error compiling pattern \"%s\"", pattern->text);
free(pattern->compiled);
pattern-> compiled = NULL;
}
tvhtrace("opentv", " compiled pattern \"%s\"", pattern->text);
TAILQ_INSERT_TAIL(ret, pattern, p_links);
}
return ret;
}
static int _opentv_genre_load_one ( const char *id, htsmsg_t *m )
{
htsmsg_field_t *f;
@ -741,12 +774,32 @@ static void _opentv_dict_load ( htsmsg_t *m )
htsmsg_destroy(m);
}
static void _opentv_free_pattern_list ( opentv_pattern_list_t *l ) {
opentv_pattern_t *p;
if (!l) return;
TAILQ_FOREACH(p, l, p_links) {
TAILQ_REMOVE(l, p, p_links);
free(p->text);
if (p->compiled) {
regfree(p->compiled);
free(p->compiled);
}
free(p);
}
}
static void _opentv_done( void *m )
{
opentv_module_t *mod = (opentv_module_t *)m;
free(mod->channel);
free(mod->title);
free(mod->summary);
_opentv_free_pattern_list (mod->p_snum);
_opentv_free_pattern_list (mod->p_enum);
_opentv_free_pattern_list (mod->p_pnum);
_opentv_free_pattern_list (mod->p_subt);
}
static int _opentv_tune
@ -795,7 +848,6 @@ static int _opentv_prov_load_one ( const char *id, htsmsg_t *m )
genre = _opentv_genre_find(str);
else
genre = NULL;
/* Exists (we expect some duplicates due to config layout) */
sprintf(ibuf, "opentv-%s", id);
@ -806,7 +858,7 @@ static int _opentv_prov_load_one ( const char *id, htsmsg_t *m )
mod = (opentv_module_t*)
epggrab_module_ota_create(calloc(1, sizeof(opentv_module_t)),
ibuf, nbuf, 2, &ops, NULL);
/* Add provider details */
mod->dict = dict;
mod->genre = genre;
@ -818,6 +870,10 @@ static int _opentv_prov_load_one ( const char *id, htsmsg_t *m )
mod->summary = _pid_list_to_array(sl);
mod->channels = &_opentv_channels;
mod->ch_rem = epggrab_module_ch_rem;
mod->p_snum = _opentv_compile_pattern_list(htsmsg_get_list(m, "season_num"));
mod->p_enum = _opentv_compile_pattern_list(htsmsg_get_list(m, "episode_num"));
mod->p_pnum = _opentv_compile_pattern_list(htsmsg_get_list(m, "part_num"));
mod->p_subt = _opentv_compile_pattern_list(htsmsg_get_list(m, "subtitle"));
return 1;
}
@ -847,7 +903,6 @@ static void _opentv_prov_load ( htsmsg_t *m )
void opentv_init ( void )
{
htsmsg_t *m;
int i;
/* Load dictionaries */
if ((m = hts_settings_load("epggrab/opentv/dict")))
@ -863,21 +918,12 @@ void opentv_init ( void )
if ((m = hts_settings_load("epggrab/opentv/prov")))
_opentv_prov_load(m);
tvhlog(LOG_DEBUG, "opentv", "providers loaded");
/* Compile some recurring regular-expressions */
_opentv_se_num_pregs = calloc(ARRAY_SIZE(_opentv_se_num_patterns), sizeof(regex_t));
for (i = 0; i < ARRAY_SIZE(_opentv_se_num_patterns); i++)
assert(!regcomp(_opentv_se_num_pregs+i, _opentv_se_num_patterns[i], REG_ICASE | REG_EXTENDED));
_opentv_subtitle_pregs = calloc(ARRAY_SIZE(_opentv_subtitle_patterns), sizeof(regex_t));
for (i = 0; i < ARRAY_SIZE(_opentv_subtitle_patterns); i++)
assert(!regcomp(_opentv_subtitle_pregs+i, _opentv_subtitle_patterns[i], REG_EXTENDED));
}
void opentv_done ( void )
{
opentv_dict_t *dict;
opentv_genre_t *genre;
int i;
while ((dict = RB_FIRST(&_opentv_dicts)) != NULL) {
RB_REMOVE(&_opentv_dicts, dict, h_link);
@ -890,13 +936,6 @@ void opentv_done ( void )
free(genre->id);
free(genre);
}
for (i = 0; i < ARRAY_SIZE(_opentv_se_num_patterns); i++)
regfree(_opentv_se_num_pregs+i);
free(_opentv_se_num_pregs);
for (i = 0; i < ARRAY_SIZE(_opentv_subtitle_patterns); i++)
regfree(_opentv_subtitle_pregs+i);
free(_opentv_subtitle_pregs);
}
void opentv_load ( void )