Skip to content
Snippets Groups Projects
Commit 04f76c53 authored by Birte Kristina Friesel's avatar Birte Kristina Friesel
Browse files

Use XML::LibXML directly

parent dde575a1
No related branches found
No related tags found
No related merge requests found
...@@ -7,7 +7,7 @@ use warnings; ...@@ -7,7 +7,7 @@ use warnings;
use 5.010; use 5.010;
use Getopt::Long qw/:config no_ignore_case/; use Getopt::Long qw/:config no_ignore_case/;
use HTML::TreeBuilder::LibXML; use XML::LibXML;
use WWW::Mechanize; use WWW::Mechanize;
my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr'; my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr';
...@@ -31,15 +31,18 @@ sub check_ambiguous { ...@@ -31,15 +31,18 @@ sub check_ambiguous {
my ($full_tree) = @_; my ($full_tree) = @_;
my $ambiguous = 0; my $ambiguous = 0;
foreach my $select (@{$full_tree->findnodes('//select')}) { my $xp_select = XML::LibXML::XPathExpression->new('//select');
my $xp_option = XML::LibXML::XPathExpression->new('./option');
foreach my $select (@{$full_tree->findnodes($xp_select)}) {
$ambiguous = 1; $ambiguous = 1;
printf( printf(
"Ambiguous input for %s\n", "Ambiguous input for %s\n",
$select->attr('name'), $select->getAttribute('name'),
); );
foreach my $val ($select->findnodes('./option')) { foreach my $val ($select->findnodes($xp_option)) {
print "\t"; print "\t";
say $val->as_trimmed_text(); say $val->textContent();
} }
} }
if ($ambiguous) { if ($ambiguous) {
...@@ -52,6 +55,13 @@ sub display_connection { ...@@ -52,6 +55,13 @@ sub display_connection {
for my $con (@{$con_parts}) { for my $con (@{$con_parts}) {
# Note: Idets @{$con} elements
foreach my $str (@{$con}) {
$str =~ s/[\s\n\t]+/ /gs;
$str =~ s/^ //;
$str =~ s/ $//;
}
if (@{$con} < 5) { if (@{$con} < 5) {
foreach my $str (@{$con}) { foreach my $str (@{$con}) {
say "# $str"; say "# $str";
...@@ -198,19 +208,20 @@ sub parse_tree { ...@@ -198,19 +208,20 @@ sub parse_tree {
my $con_no = 0; my $con_no = 0;
my $cons; my $cons;
foreach my $td (@{$full_tree->findnodes('//table//table/tr/td')}) { my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td');
my $xp_img = XML::LibXML::XPathExpression->new('./img');
foreach my $td (@{$full_tree->findnodes($xp_td)}) {
my $colspan = $td->attr('colspan') // 0; my $colspan = $td->getAttribute('colspan') // 0;
my $class = $td->attr('class') // q{}; my $class = $td->getAttribute('class') // q{};
# Putting these into the XPath expression would lead to noticable (1
# to 2 seconds) performance penalties
if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) { if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) {
next; next;
} }
if ($colspan == 8) { if ($colspan == 8) {
if ($td->as_trimmed_text() =~ / (?<no> \d+ ) \. .+ Fahrt /x) { if ($td->textContent() =~ / (?<no> \d+ ) \. .+ Fahrt /x) {
$con_no = $+{'no'} - 1; $con_no = $+{'no'} - 1;
$con_part = 0; $con_part = 0;
next; next;
...@@ -226,8 +237,8 @@ sub parse_tree { ...@@ -226,8 +237,8 @@ sub parse_tree {
} }
} }
if (not @{$td->findnodes('./img')} and $td->as_text() !~ /^\s*$/) { if (not @{$td->findnodes($xp_img)} and $td->textContent() !~ /^\s*$/) {
push(@{$cons->[$con_no]->[$con_part]}, $td->as_trimmed_text()); push(@{$cons->[$con_no]->[$con_part]}, $td->textContent());
} }
} }
return $cons; return $cons;
...@@ -326,9 +337,7 @@ if ($test_dump) { ...@@ -326,9 +337,7 @@ if ($test_dump) {
exit 0 exit 0
} }
my $tree = HTML::TreeBuilder::LibXML->new(); my $tree = XML::LibXML->load_html(string => $content);
$tree->parse($content);
$tree->eof();
check_ambiguous($tree); check_ambiguous($tree);
......
Ambiguous input for name_origin Ambiguous input for name_origin
Bredeney Bredeney
Bredeney Friedhof Bredeney Friedhof
Bredeneyer Kreuz Bredeneyer Kreuz
Ambiguous input for name_destination Ambiguous input for name_destination
Werden Brücke Werden Brücke
Werden S Werden S
Werdener Markt Werdener Markt
Werdener Str. Werdener Str.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment