From b9a4e9c8a68dcb6b17617f4f7a5ad5ad3298bfa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Wed, 9 Mar 2016 22:05:38 +0100 Subject: Imported Upstream version 1.7.1 --- lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm | 246 +++++-- lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm | 435 ------------ lib/gcstar/GCPlugins/GCfilms/GCImdb.pm | 888 +++++++++++++------------ lib/gcstar/GCPlugins/GCgames/GCGameSpot.pm | 365 ++++++---- lib/gcstar/GCPlugins/GCgames/GCJeuxVideoCom.pm | 512 +++++++------- lib/gcstar/GCPlugins/GCgames/GCJeuxVideoFr.pm | 78 ++- lib/gcstar/GCPlugins/GCgames/GCMobyGames.pm | 287 ++++---- 7 files changed, 1381 insertions(+), 1430 deletions(-) delete mode 100644 lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm (limited to 'lib/gcstar/GCPlugins') diff --git a/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm index 457194a..636fd4f 100644 --- a/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm +++ b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm @@ -39,11 +39,11 @@ use GCPlugins::GCcomics::GCcomicsCommon; my ( $self, $word ) = @_; if ($self->{searchField} eq 'series') { - return "http://www.bedetheque.com/index.php?R=1&RechSerie=$word"; + return "http://www.bedetheque.com/search/albums?RechSerie=$word"; } elsif ($self->{searchField} eq 'writer') { - return "http://www.bedetheque.com/index.php?R=1&RechAuteur=$word"; + return "http://www.bedetheque.com/search/albums?RechAuteur=$word"; } else { @@ -64,6 +64,7 @@ use GCPlugins::GCcomics::GCcomicsCommon; my @array = split( /#/, $url ); $self->{site_internal_id} = $array[1]; + # print "getItemUrl $url\n\n"; return $url if $url =~ /^http:/; return "http://www.bedetheque.com/" . $url; } @@ -143,14 +144,30 @@ use GCPlugins::GCcomics::GCcomicsCommon; } else { - $html =~ m/(
.+)/; - - #$html =~ m/(
.+)/; $html = $1; $self->{isResultsTable} = 0; $self->{parsingEnded} = 0; $self->{isCover} = 0; - $self->{itemIdx}++;; + $self->{isTabs} = 0; + $self->{isLabel} = 0; + $self->{itemIdx}++; + # + $self->{doneColourist} = 0 ; + $self->{doneCost} = 0 ; + $self->{doneFormat} = 0 ; + $self->{doneIllustrator} = 0 ; + $self->{doneISBN} = 0 ; + $self->{doneNumberboards} = 0 ; + $self->{donePublishdate} = 0 ; + $self->{donePublishdate} = 0 ; + $self->{donePublisher} = 0 ; + $self->{doneSerie} = 0 ; + $self->{doneSynopsis} = 0 ; + $self->{doneTitle} = 0 ; + $self->{doneVolume} = 0 ; + $self->{doneWriter} = 0 ; } return $html; @@ -178,69 +195,100 @@ use GCPlugins::GCcomics::GCcomicsCommon; #$self->{itemsList}[ $self->{itemIdx} ]->{url} = # "http://www.bedetheque.com/" . $attr->{href}; } - elsif ( $tagname eq "i" ) - { + elsif ( ( $tagname eq "ul" ) && ( $attr->{class} eq "search-list" ) ) { + $self->{inTable} = 1; + } + elsif ( ($self->{inTable}) && ( $tagname eq "li" ) ) { + $self->{isVolume} = 1; + } + elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{title} eq "tooltip" ) ) { + $self->{itemsList}[$self->{itemIdx}]->{image} = $attr->{rel}; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + } + elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "titre" ) ) { + $self->{isTitle} = 1; + } + elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "serie" ) ) { $self->{isSerie} = 1; } + elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "num" ) ) { + $self->{isNumber} = 1; + } } else { - if ( ( $tagname eq "table" ) && ( $attr->{id} eq "albums_serie" ) ) { + if ( ( $tagname eq "ul" ) && ( $attr->{class} eq "search-list" ) ) { $self->{inTable} = 1; } - elsif ( ($self->{inTable}) && ( $tagname eq "td" ) && ( $attr->{class} eq "num" ) ) { + elsif ( ($self->{inTable}) && ( $tagname eq "li" ) ) { $self->{itemIdx}++; $self->{isVolume} = 1; } - elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/serie-/ ) ) { + elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{title} eq "tooltip" ) ) { + $self->{itemsList}[$self->{itemIdx}]->{image} = $attr->{rel}; $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + } + elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "titre" ) ) { $self->{isTitle} = 1; } + elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "serie" ) ) { + $self->{isSerie} = 1; + } elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "br" ) && ( $self->{startSynopsis} ) ) { # This is a stop! for br ;-) and complementary of the p in the end section # should be ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) $self->{isSynopsis} = 0; $self->{startSynopsis} = 0; - $self->{parsingEnded} = 1; + $self->{parsingEnded} = 1; } } } else { - if ( $tagname eq "title") - { - $self->{isIssue} = 1; - $self->{isTitle} = 1; - } - - if ( ( $self->{isCover} == 0 ) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/Couvertures\/.*\.[jJ][pP][gG]/ ) ) - { - $self->{curInfo}->{image} = 'http://www.bedetheque.com/' . $attr->{href}; + if ( ( $self->{isCover} == 0 ) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/Couvertures\/.*\.[jJ][pP][gG]/ ) ) { + $self->{curInfo}->{image} = $attr->{href}; $self->{isCover} = 1; } - elsif ( ( $tagname eq "div") && ( $attr->{class} eq "titre" ) ) { - $self->{isVolume} = 1; - } - elsif ( ( $tagname eq "ul") && ( $attr->{class} eq "infos" ) ) { - $self->{isResultsTable} = 1; + elsif ( $tagname eq "label" ) { + $self->{isLabel} = 1; } - elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) { - $self->{current_field} = ''; - $self->{openlabel} = 1; + elsif ( ( $tagname eq "ul" ) && ( $attr->{class} eq "tabs-album" ) && ( ! $self->{doneSerie} ) ) { + $self->{isTabs} = 1; } - elsif ( ( $tagname eq "div" ) && ( $attr->{class} eq "title" ) && ( !defined( $self->{curInfo}->{title} ) || ( $self->{curInfo}->{title} =~ /^$/ ) ) ) { + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "name" ) && ( ! $self->{doneTitle} ) ) { $self->{isTitle} = 1; } - elsif ( ( $tagname eq "span" ) && ( $attr->{class} eq "type" ) ) { + elsif ( ( $tagname eq "span" ) && ( $attr->{class} eq "titre-rubrique" ) && ( ! $self->{doneSerie} ) && ( $self->{isTabs} ) ) { $self->{isSerie} = 1; } - elsif ( $tagname eq "em" ) { + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "author" ) && ( ! $self->{doneWriter} ) ) { + $self->{isWriter} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "illustrator" ) && ( ! $self->{doneIllustrator} ) ) { + $self->{isIllustrator} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "illustrator" ) && ( ! $self->{doneColourist} ) && ( $self->{doneIllustrator} ) ) { + $self->{isColourist} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "publisher" ) && ( ! $self->{donePublisher} ) ) { + $self->{isPublisher} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "isbn" ) && ( ! $self->{doneISBN} ) ) { + $self->{isISBN} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "numberOfPages" ) && ( ! $self->{doneNumberboards} ) ) { + $self->{isNumberboards} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "description" ) && ( ! $self->{doneSynopsis} ) ) { $self->{isSynopsis} = 1; } - elsif ( ( $tagname eq "a" ) && ( $attr->{class} eq "titre eo" ) ) { - if ( $attr->{title} =~ m/.+\s-(\d+)-\s.+/ ) { - $self->{curInfo}->{volume} = $1; - } + elsif ( ( $tagname eq "ul" ) && ( $attr->{class} eq "liste-albums" ) ) { + $self->{doneColourist} = 1; # To avoid getting mess with illustrator } +# elsif ( ( $tagname eq "a" ) && ( $attr->{class} eq "titre eo" ) ) { +# if ( $attr->{title} =~ m/.+\s-(\d+)-\s.+/ ) { +# $self->{curInfo}->{volume} = $1; +# } +# } } } @@ -260,6 +308,17 @@ use GCPlugins::GCcomics::GCcomicsCommon; $self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext; $self->{isSerie} = 0; } + elsif ( $self->{isTitle} == 1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ( $self->{isNumber} == 1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext; + $self->{itemsList}[ $self->{itemIdx} ]->{volume} =~ s/#//; + $self->{isNumber} = 0; + } else { if ($self->{isCollection} == 1) @@ -296,8 +355,8 @@ use GCPlugins::GCcomics::GCcomicsCommon; "Scénario :" => 'writer', "Dessin :" => 'illustrator', "Couleurs :" => 'colourist', - "Dépot légal :" => 'publishdate', - "Achevé impr. :" => 'printdate ', + "Dépot légal :" => 'publishdate', + "Achevé impr. :" => 'printdate ', "Estimation :" => 'cost', "Editeur :" => 'publisher', "Collection : " => 'collection', @@ -317,27 +376,86 @@ use GCPlugins::GCcomics::GCcomicsCommon; $self->{current_field} = ""; } } - elsif ( $self->{isVolume} ) - { - $self->{curInfo}->{volume} = $origtext; - $self->{isVolume} = 0 ; - } - - if ( $self->{isTitle} ) - { + elsif ( $self->{isTitle} ) { $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + $self->{doneTitle} = 1 ; } elsif ( $self->{isSerie} ) { $self->{curInfo}->{series} = $origtext; $self->{curInfo}->{series} =~s/^\s+//; + $self->{isSerie} = 0 ; + $self->{doneSerie} = 1 ; + $self->{isTabs} = 0 ; + } + elsif ( $self->{isWriter} ) { + $self->{curInfo}->{writer} = $origtext; + $self->{isWriter} = 0 ; + $self->{doneWriter} = 1 ; + } + elsif ( $self->{isIllustrator} ) { + $self->{curInfo}->{illustrator} = $origtext; + $self->{isIllustrator} = 0 ; + $self->{doneIllustrator} = 1 ; + } + elsif ( $self->{isColourist} ) { + $self->{curInfo}->{colourist} = $origtext; + $self->{isColourist} = 0 ; + $self->{doneColourist} = 1 ; + } + elsif ( $self->{isPublisher} ) { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + $self->{donePublisher} = 1 ; } - elsif ( ( $self->{isSynopsis} ) && ( ( $origtext =~ /Résumé de l'album :/ ) || ( $origtext =~ /Résumé de la série :/ ) ) ) { - $self->{startSynopsis} = 1; + elsif ( $self->{isISBN} ) { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + $self->{doneISBN} = 1 ; } - elsif ( ( $self->{isSynopsis} ) && ( $self->{startSynopsis} ) ) { - $self->{curInfo}->{synopsis} .= " ".$origtext; + elsif ( $self->{isNumberboards} ) { + $self->{curInfo}->{numberboards} = $origtext; + $self->{isNumberboards} = 0 ; + $self->{doneNumberboards} = 1 ; + } + elsif ( $self->{isVolume} ) { + $self->{curInfo}->{volume} = $origtext; + $self->{isVolume} = 0 ; + $self->{doneVolume} = 1 ; + } + elsif ( ( $self->{isLabel} ) && ( $origtext =~ m/Dépot légal/ ) && ( ! $self->{donePublishdate} ) ) { + $self->{isPublishdate} = 1 ; + $self->{isLabel} = 0 ; + } + elsif ( $self->{isPublishdate} ) { + $self->{curInfo}->{publishdate} = $origtext; + $self->{isPublishdate} = 0 ; + $self->{donePublishdate} = 1 ; + } + elsif ( ( $self->{isLabel} ) && ( $origtext =~ m/Estimation/ ) && ( ! $self->{doneCost} ) ) { + $self->{isCost} = 1 ; + $self->{isLabel} = 0 ; + } + elsif ( $self->{isCost} ) { + $self->{curInfo}->{cost} = $origtext; + $self->{isCost} = 0 ; + $self->{doneCost} = 1 ; + } + elsif ( ( $self->{isLabel} ) && ( $origtext =~ m/Format/ ) && ( ! $self->{doneFormat} ) ) { + $self->{isFormat} = 1 ; + $self->{isLabel} = 0 ; + } + elsif ( $self->{isFormat} ) { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0 ; + $self->{doneFormat} = 1 ; + } + elsif ( $self->{isSynopsis} ) { + $self->{curInfo}->{synopsis} = $origtext; $self->{curInfo}->{synopsis} =~ s/^(\s)*//; $self->{curInfo}->{synopsis} =~ s/(\s)*$//; + $self->{isSynopsis} = 0 ; + $self->{doneSynopsis} = 1 ; } } } @@ -359,9 +477,9 @@ use GCPlugins::GCcomics::GCcomicsCommon; $self->{isCollection} = 0; } } else { - if ( ( $self->{inTable} ) && ( $tagname eq "a" ) ) { + if ( ( $self->{inTable} ) && ( $tagname eq "span" ) ) { $self->{isTitle} = 0; - } elsif ( ( $self->{inTable} ) && ( $tagname eq "td" ) ) { + } elsif ( ( $self->{inTable} ) && ( $tagname eq "li" ) ) { $self->{isVolume} = 0; } } @@ -373,18 +491,20 @@ use GCPlugins::GCcomics::GCcomicsCommon; $self->{isIssue} = 0; $self->{isResultsTable} = 0; } - elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) { + elsif ( $tagname eq "label" ) { $self->{openlabel} = 0; + $self->{isLabel} = 0; } - elsif ( ( $self->{isTitle} ) && ( ( $tagname eq "div" ) || ( $tagname eq "h1" ) ) ) { - $self->{isTitle} = 0; - } - elsif ( ( $self->{isSerie} ) && ( $tagname eq "a" ) ) { - $self->{isSerie} = 0; - } - elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "em" ) && ( !$self->{startSynopsis} ) ) { - $self->{isSynopsis} = 0; - $self->{startSynopsis} = 0; + elsif ( $tagname eq "span" ) { + $self->{isColourist} = 0; + $self->{isIllustrator} = 0; + $self->{isISBN} = 0; + $self->{isNumberboards} = 0; + $self->{isPublisher} = 0; + $self->{isSerie} = 0; + $self->{isSynopsis} = 0; + $self->{isTitle} = 0; + $self->{isWriter} = 0; } elsif ( ( $self->{isSynopsis} ) && ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) && ( $self->{startSynopsis} ) ) { $self->{isSynopsis} = 0; @@ -395,4 +515,4 @@ use GCPlugins::GCcomics::GCcomicsCommon; } } -1; \ No newline at end of file +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm b/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm deleted file mode 100644 index 75c6854..0000000 --- a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm +++ /dev/null @@ -1,435 +0,0 @@ -package GCPlugins::GCfilms::GCAlpacineES; - -################################################### -# -# Copyright 2005-2010 Christian Jodar -# -# This file is part of GCstar. -# -# GCstar is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GCstar is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCstar; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -# -################################################### - -use strict; -use utf8; - -use GCPlugins::GCfilms::GCfilmsCommon; - -{ - package GCPlugins::GCfilms::GCPluginAlpacineES; - - use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); - - - # text - # Called each time some plain text (between tags) is processed. - # $origtext is the read text. - sub text - { - my ($self, $origtext) = @_; - - return if length($origtext) < 2; - - # Código para procesar el resultado de la busqueda - if ($self->{parsingList}){ - # Guardamos la fecha. - if ($self->{inside}->{li} && $self->{insideInfos}){ - $origtext =~ /. \(([0-9]{4})\)/; - $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; - } - # Guardamos el título - if ($self->{inside}->{a} && $self->{insideInfos}){ - $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; - } - } - - else{ - # Eliminamos espacios iniciales, espacios dobles y espacios finales del texto - $origtext =~ s/^\s*|\s{2,}|\s*$//g; - # Estamos procesando el titulo - if ($self->{insideTitle}) - { - # Obtenemos titulo y fecha - $origtext =~ /(.*) \(([0-9]{4})\)/; - $self->{curInfo}->{title} = $1; - $self->{curInfo}->{date} = $2; - $self->{insideTitle} = 0; - return; - } - - # Si existe el hipervinculo "Ampliar" cambiamos la imagen por la ampliada - if ($self->{inside}->{a} && $origtext eq "Ampliar"){ - $self->{curInfo}->{image} =~ /(http:\/\/img.alpacine.com\/carteles\/.*)-[0-9]*(\.jpg)/; - $self->{curInfo}->{image} = $1 . $2; - return; - } - # Estamos en la puntuación real - if($self->{insideRating}){ - $self->{curInfo}->{ratingpress} = int( $origtext + 0.5 ); - $self->{insideRating} = 0; - } - # No hay puntuación real, asignamos 0 por defecto - if($self->{inside}->{div}){ - if($origtext =~ /Esperando \d votos/){ - $self->{curInfo}->{ratingpress} = 0; - } - } - # Procesamos el titulo original - if ($self->{isOrigTit} eq 1) { - $self->{isOrigTit} = 0; - $self->{curInfo}->{original} = $origtext; - return; - } - # Procesamos los generos (gen, gen, gen, gen...) - if ($self->{isGenres} eq 1) { - if($origtext ne ""){ - # hacemos uso de sus propias comas - $self->{curInfo}->{genre} .= $origtext; - } - else{ - $self->{isGenres} = 0; - } - return; - } - # Procesamos el país - if ($self->{isCountry} eq 1) { - $self->{isCountry} = 0; - $self->{curInfo}->{country} = $origtext; - return; - } - # Procesamos la duración - if ($self->{isTime} eq 1) { - $self->{isTime} = 0; - $self->{curInfo}->{time} = $origtext; - return; - } - # Procesamos los directores - if ($self->{isDirector} eq 1) { - if($origtext ne ""){ - if($self->{curInfo}->{director} eq ""){ - $self->{curInfo}->{director} .= $origtext; - } - else{ - $self->{curInfo}->{director} .= ", $origtext"; - } - } - else{ - $self->{isDirector} = 0; - } - return; - } - # Actores - if ($self->{isActors} eq 1) { - if($origtext ne ""){ - if($self->{curInfo}->{actors} eq ""){ - $self->{curInfo}->{actors} .= $origtext; - } - else{ - $self->{curInfo}->{actors} .= ", $origtext"; - } - } - else{ - $self->{isActors} = 0; - } - return; - } - # Procesamos la Sinopsis - if ($self->{isSynopsis} eq 1) { - $self->{isSynopsis} = 0; - $self->{curInfo}->{synopsis} = $origtext; - return; - } - # Procesamos los premios - if ($self->{isAwards} eq 1) { - $self->{isAwards} = 0; - $self->{curInfo}->{synopsis} = $self->{curInfo}->{synopsis}. "\n\nPremios:\n\t".$origtext; - $self->{insideInfos} = 0; - return; - } - - # Condiciones para procesar los campos en el siguiente ciclo - if($self->{insideInfos}){ - $self->{isOrigTit} = 1 if $origtext eq "Título original:"; - $self->{isGenres} = 1 if $origtext eq "Género:"; - $self->{isCountry} = 1 if $origtext eq "País:"; - $self->{isTime} = 1 if $origtext eq "Duración:"; - $self->{isDirector} = 1 if $origtext eq "Dirección:"; - $self->{isActors} = 1 if $origtext eq "Interpretación:"; - $self->{isSynopsis} = 1 if $origtext eq "Sinopsis:"; - $self->{isAwards} = 1 if $origtext eq "Premios:"; - } - } - } - - - # end - # Called each time a HTML tag ends. - # $tagname is the tag name. - sub end - { - my ($self, $tagname) = @_; - $self->{inside}->{$tagname}--; - - # Código para procesar el resultado de la busqueda - #if ($self->{parsingList}){ - #} - # Código para procesar la información de la pelicula seleccionada - #else { - #} - } - - # In processing functions below, self->{parsingList} can be used. - # If true, we are processing a search results page - # If false, we are processing a item information page. - - # $self->{inside}->{tagname} (with correct value for tagname) can be used to test - # if we are in the corresponding tag. - - # You have a counter $self->{itemIdx} that have to be used when processing search results. - # It is your responsability to increment it! - - # When processing search results, you have to fill the available fields for results - # - # $self->{itemsList}[$self->{movieIdx}]->{field_name} - # - # When processing a movie page, you need to fill the fields (if available) - # in $self->{curInfo}. - # - # $self->{curInfo}->{field_name} - - # start - # Called each time a new HTML tag begins. - # $tagname is the tag name. - # $attr is reference to an associative array of tag attributes. - # $attrseq is an array reference containing all the attributes name. - # $origtext is the tag text as found in source file - # Returns nothing - sub start - { - my ($self, $tagname, $attr, $attrseq, $origtext) = @_; - $self->{inside}->{$tagname}++; - - # Código para procesar el resultado de la busqueda para generar el listado - if ($self->{parsingList}) - { - # Comprobamos si estamos dentro de un título utilizando el atributo class - if( ($tagname eq "li" ) && ($attr->{class} ne "mas" )){ - $self->{itemIdx}++; - $self->{insideInfos} = 1 ; - return; - } - if( ($tagname eq "li" ) && ($attr->{class} eq "mas" )){ - $self->{insideInfos} = 0; - return; - } - # Si estamos en un título y encontramos una tag a, es un enlace a ficha - if ($tagname eq "a" && $self->{insideInfos}){ - $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.alpacine.com".$attr->{href}; - return; - } - } - # Código para procesar la información de la pelicula seleccionada - else { - if ($tagname eq "h1"){ - $self->{insideTitle} = 1; - return; - } - # Si estamos dentro de una imagen y el src es el del thumb lo asignamos como imagen - if ($tagname eq "img") - { - # Extraemos la dirección de la imagen thumb - if ($attr->{src} =~ /http:\/\/img.alpacine.com\/carteles\/.*\.jpg/) - { - $self->{curInfo}->{image} = $attr->{src}; - } - return; - } - - if ($tagname eq "div" && $attr->{class} eq "voto"){ - $self->{insideRating} = 1; - return; - } - - if( $tagname eq "div" && $attr->{class} eq "datos" ){ - $self->{insideInfos} = 1 ; - return; - } - } - } - - # preProcess - # Called before each page is processed. You can use it to do some substitutions. - # $html is the page content. - # Returns modified version of page content. - sub preProcess - { - my ($self, $html) = @_; - - # Anulamos el html si coincide con el patron de no resultados - if($html =~ /^.*No hay resultados para.*$/s){ - $html = ""; - return $html; - } - - # Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html - # el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa - if($html =~ s/^.*
Pel.culas \([0-9]* resultado[s]?\)<\/span><\/div>