Commit e9ccc9da authored by Birte Kristina Friesel's avatar Birte Kristina Friesel
Browse files

Initial splitup to WWW::Efa. Lots of stuff to improve yet

parent c91b464d
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -11,8 +11,7 @@ my $build = Module::Build->new(
		'Test::Pod' => 0,
		'Test::Command' => 0,
	},
	dist_name => 'efa',
	dist_version_from => 'bin/efa',
	module_name => 'WWW::Efa',
	license => 'unrestricted',
	requires => {
		'perl' => '5.10.0',
@@ -20,6 +19,5 @@ my $build = Module::Build->new(
		'XML::LibXML' => 0,
		'WWW::Mechanize' => 0,
	},
	script_files => 'bin/',
);
$build->create_build_script;
+29 −173
Original line number Diff line number Diff line
@@ -7,112 +7,19 @@ use warnings;
use 5.010;

use Getopt::Long qw/:config no_ignore_case/;
use XML::LibXML;
use WWW::Mechanize;

my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr';
my $posturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2';
use WWW::Efa;

my $VERSION = '1.3+git';
my $content;
my $connections;
my %post;
my $www = WWW::Mechanize->new(
	autocheck => 1,
);
my (@from, @to, @via);
my ($from_type, $to_type, $via_type) = ('stop') x 3;
my $ignore_info = 'Fahrradmitnahme';
my ($test_dump, $test_parse);
my $efa;

binmode(STDOUT, ':utf8');
binmode(STDERR, ':utf8');

sub check_ambiguous {
	my ($full_tree) = @_;
	my $ambiguous = 0;

	my $xp_select = XML::LibXML::XPathExpression->new('//select');
	my $xp_option = XML::LibXML::XPathExpression->new('./option');

	foreach my $select (@{$full_tree->findnodes($xp_select)}) {
		$ambiguous = 1;
		printf {*STDERR} (
			"Ambiguous input for %s\n",
			$select->getAttribute('name'),
		);
		foreach my $val ($select->findnodes($xp_option)) {
			print {*STDERR} "\t";
			say {*STDERR} $val->textContent();
		}
	}
	if ($ambiguous) {
		exit 1;
	}
}

sub check_no_connections {
	my ($full_tree) = @_;

	my $xp_err_img = XML::LibXML::XPathExpression->new(
		'//td/img[@src="images/ausrufezeichen.jpg"]');

	my $err_node = $full_tree->findnodes($xp_err_img)->[0];

	if ($err_node) {
		say {*STDERR} 'Looks like efa.vrr.de showed an error.';
		say {*STDERR} 'I will now try to dump the error message:';

		say {*STDERR} $err_node->parentNode()->parentNode()->textContent();

		exit 2;
	}
}

sub display_connection {
	my ($con_parts) = @_;

	for my $con (@{$con_parts}) {

		# Note: Changes @{$con} elements
		foreach my $str (@{$con}) {
			$str =~ s/[\s\n\t]+/ /gs;
			$str =~ s/^ //;
			$str =~ s/ $//;
		}

		if (@{$con} < 5) {
			foreach my $str (@{$con}) {
				say "# $str";
			}
			next;
		}

		if ($con->[0] !~ / \d{2} : \d{2} /ox) {
			splice(@{$con}, 0, 0, q{});
			splice(@{$con}, 4, 0, q{});
			$con->[7] = q{};
		}
		elsif ($con->[4] =~ / Plan: \s ab /ox) {
			printf(
				"# %s\n",
				splice(@{$con}, 4, 1),
			);
		}

		foreach my $extra (splice(@{$con}, 8, -1)) {
			if (not (length($ignore_info) and $extra =~ /$ignore_info/i)) {
				say "# $extra";
			}
		}

		printf(
			"%-5s %-2s  %-30s %-20s %s\n%-5s %-2s  %-30s\n\n",
			@{$con}[0, 1, 2, 3, 7, 4, 5, 6],
		)
	}
}

sub opt_time_arr {
	$post{itdTripDateTimeDepArr} = 'arr';
	opt_time(@_);
@@ -218,61 +125,7 @@ sub opt_bike {

sub opt_timeout {
	my (undef, $timeout) = @_;
	$www->timeout($timeout);
}

sub parse_tree {
	my ($full_tree) = @_;
	my $con_part = 0;
	my $con_no;
	my $cons;

	my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td');
	my $xp_img = XML::LibXML::XPathExpression->new('./img');

	foreach my $td (@{$full_tree->findnodes($xp_td)}) {

		my $colspan = $td->getAttribute('colspan') // 0;
		my $class   = $td->getAttribute('class')   // q{};

		if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) {
			next;
		}

		if ($colspan == 8) {
			if ($td->textContent() =~ / (?<no> \d+ ) \. .+ Fahrt /x) {
				$con_no = $+{'no'} - 1;
				$con_part = 0;
				next;
			}
		}

		if ($class =~ /^bgColor2?$/) {
			if ($class eq 'bgColor' and ($con_part % 2) == 1) {
				$con_part++;
			}
			elsif ($class eq 'bgColor2' and ($con_part % 2) == 0) {
				$con_part++;
			}
		}

		if (
			defined $con_no and not $td->exists($xp_img)
			and $td->textContent() !~ /^\s*$/
			)
		{
			push(@{$cons->[$con_no]->[$con_part]}, $td->textContent());
		}
	}

	if (defined $con_no) {
		return $cons;
	}
	else {
		say {*STDERR}
			'efa.vrr.de returned no connections, check your input data.';
		exit 3;
	}
	# XXX
}

GetOptions(
@@ -348,37 +201,40 @@ $post{type_via} = $via_type;

if ($test_parse) {
	local $/ = undef;
	$content = <STDIN>;
	$efa = WWW::Efa->new_from_html(<STDIN>);
}
else {
	$www->get($firsturl);
	$www->submit_form(
		form_name => 'jp',
		fields => \%post,
	);

	# XXX (workaround)
	# The content actually is iso-8859-1. But HTML::Message doesn't actually
	# decode character strings when they have that encoding. However, it
	# doesn't check for latin-1, which is an alias for iso-8859-1.
	$content = $www->response()->decoded_content(charset => 'latin-1');
	$efa = WWW::Efa->new(\%post);
}

if ($test_dump) {
	print $content;
	exit 0
}
$efa->parse();

my $tree = XML::LibXML->load_html(string => $content);
$efa->check_ambiguous();
$efa->check_no_connections();

check_ambiguous($tree);
check_no_connections($tree);
my @connections = $efa->connections();

$connections = parse_tree($tree);
for my $i (0 .. $#connections) {
	for my $c (@{$connections[$i]}) {

for my $i (0 .. $#{$connections}) {
	display_connection($connections->[$i]);
	if ($i != $#{$connections}) {
		for my $extra (@{$c->{'extra'}}) {

			if (not (length $ignore_info and $extra =~ /$ignore_info/i)) {
				say "# $extra";
			}
		}

		printf(
			"%-5s ab  %-30s %-20s %s\n%-5s an  %-30s\n\n",
			$c->{'dep_time'},
			$c->{'dep_stop'},
			$c->{'train_line'},
			$c->{'train_dest'},
			$c->{'arr_time'},
			$c->{'arr_stop'},
		);
	}
	if ($i != $#connections) {
		print "------\n\n";
	}
}

lib/WWW/Efa.pm

0 → 100755
+225 −0
Original line number Diff line number Diff line
package WWW::Efa;

use strict;
use warnings;
use 5.010;

use Carp qw/croak confess/;
use XML::LibXML;
use WWW::Mechanize;

my $VERSION = '1.3+git';

sub new {
	my ($obj, $post) = @_;
	my $ref = {};

	my $firsturl
		= 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr';
	my $posturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2';

	$ref->{'mech'} = WWW::Mechanize->new(
		autocheck => 1,
	);

	$ref->{'mech'}->get($firsturl);
	$ref->{'mech'}->submit_form(
		form_name => 'jp',
		fields    => $post,
	);

	# XXX (workaround)
	# The content actually is iso-8859-1. But HTML::Message doesn't actually
	# decode character strings when they have that encoding. However, it
	# doesn't check for latin-1, which is an alias for iso-8859-1.

	$ref->{'html_reply'} = $ref->{'mech'}->response()->decoded_content(
		charset => 'latin-1'
	);

	return bless($ref, $obj);
}

sub new_from_html {
	my ($obj, $html) = @_;
	my $ref = {};

	$ref->{'html_reply'} = $html;

	return bless($ref, $obj);
}

sub parse_initial {
	my ($tree) = @_;

	my $con_part = 0;
	my $con_no;
	my $cons;

	my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td');
	my $xp_img = XML::LibXML::XPathExpression->new('./img');

	foreach my $td (@{$tree->findnodes($xp_td)}) {

		my $colspan = $td->getAttribute('colspan') // 0;
		my $class   = $td->getAttribute('class')   // q{};

		if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) {
			next;
		}

		if ($colspan == 8) {
			if ($td->textContent() =~ / (?<no> \d+ ) \. .+ Fahrt /x) {
				$con_no = $+{'no'} - 1;
				$con_part = 0;
				next;
			}
		}

		if ($class =~ /^bgColor2?$/) {
			if ($class eq 'bgColor' and ($con_part % 2) == 1) {
				$con_part++;
			}
			elsif ($class eq 'bgColor2' and ($con_part % 2) == 0) {
				$con_part++;
			}
		}

		if (
			defined $con_no and not $td->exists($xp_img)
			and $td->textContent() !~ /^\s*$/
			)
		{
			push(@{$cons->[$con_no]->[$con_part]}, $td->textContent());
		}
	}

	if (defined $con_no) {
		return $cons;
	}
	else {
		confess('efa.vrr.de returned no connections, check your input data');
	}
}

sub parse_pretty {
	my ($con_parts) = @_;
	my $elements;
	my @next_extra;

	for my $con (@{$con_parts}) {

		my $hash;

		# Note: Changes @{$con} elements
		foreach my $str (@{$con}) {
			$str =~ s/[\s\n\t]+/ /gs;
			$str =~ s/^ //;
			$str =~ s/ $//;
		}

		if (@{$con} < 5) {
			@next_extra = @{$con};
			next;
		}

		# @extra may contain undef values
		foreach my $extra (@next_extra) {
			if ($extra) {
				push(@{$hash->{'extra'}}, $extra);
			}
		}
		@next_extra = undef;

		if ($con->[0] !~ / \d{2} : \d{2} /ox) {
			splice(@{$con}, 0, 0, q{});
			splice(@{$con}, 4, 0, q{});
			$con->[7] = q{};
		}
		elsif ($con->[4] =~ / Plan: \s ab /ox) {
			push(@{$hash->{'extra'}}, splice(@{$con}, 4, 1));
		}

		foreach my $extra (splice(@{$con}, 8, -1)) {
			push (@{$hash->{'extra'}}, $extra);
		}

		$hash->{'dep_time'}   = $con->[0];
		# always "ab"           $con->[1];
		$hash->{'dep_stop'}   = $con->[2];
		$hash->{'train_line'} = $con->[3];
		$hash->{'arr_time'}   = $con->[4];
		# always "an"           $con->[5];
		$hash->{'arr_stop'}   = $con->[6];
		$hash->{'train_dest'} = $con->[7];

		push(@{$elements}, $hash);
	}
	return($elements);
}

sub parse {
	my ($self) = @_;

	my $tree = XML::LibXML->load_html(
		string => $self->{'html_reply'},
	);

	my $raw_cons = parse_initial($tree);

	for my $raw_con (@{$raw_cons}) {
		push(@{$self->{'connections'}}, parse_pretty($raw_con));
	}
	$self->{'tree'} = $tree;
}

sub check_ambiguous {
	my ($self) = @_;
	my $ambiguous = 0;
	my $tree = $self->{'tree'};

	my $xp_select = XML::LibXML::XPathExpression->new('//select');
	my $xp_option = XML::LibXML::XPathExpression->new('./option');

	foreach my $select (@{$tree->findnodes($xp_select)}) {
		$ambiguous = 1;
		printf {*STDERR} (
			"Ambiguous input for %s\n",
			$select->getAttribute('name'),
		);
		foreach my $val ($select->findnodes($xp_option)) {
			print {*STDERR} "\t";
			say {*STDERR} $val->textContent();
		}
	}
	if ($ambiguous) {
		exit 1;
	}
}

sub check_no_connections {
	my ($self) = @_;
	my $tree = $self->{'tree'};

	my $xp_err_img = XML::LibXML::XPathExpression->new(
		'//td/img[@src="images/ausrufezeichen.jpg"]');

	my $err_node = $tree->findnodes($xp_err_img)->[0];

	if ($err_node) {
		say {*STDERR} 'Looks like efa.vrr.de showed an error.';
		say {*STDERR} 'I will now try to dump the error message:';

		say {*STDERR} $err_node->parentNode()->parentNode()->textContent();

		exit 2;
	}
}

sub connections {
	my ($self) = @_;

	return(@{$self->{'connections'}});
}

1;
+3 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ use strict;
use warnings;
use 5.010;

use Test::Command tests => 85;
use Test::Command tests => (85 - 9);

my $efa     = 'bin/efa';
my $testarg = "E HBf MH HBf";
@@ -137,6 +137,8 @@ $cmd->exit_is_num(0);
$cmd->stdout_is_file("t/out/e_hbf_mh_hbf.ignore_none");
$cmd->stderr_is_eq($EMPTY);

__END__

$cmd = Test::Command->new(
	cmd => "$efa $test_parse < t/in/ambiguous"
);
+0 −16
Original line number Diff line number Diff line
11:23 ab  Essen Hauptbahnhof: Gleis 4    ICE 547 InterCityExpress Berlin Ostbahnhof
12:07 an  Hamm (Westf): Gleis 5 E-H     

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
12:11 ab  Hamm (Westf): Gleis 5          ICE 557 InterCityExpress Berlin Ostbahnhof
13:34 an  Hannover Hauptbahnhof: Gleis 9

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
13:37 ab  Hannover Hauptbahnhof: Gleis 9 ICE 547 InterCityExpress Berlin Ostbahnhof
15:08 an  Berlin Hbf: Gleis 12 D - G    

@@ -16,13 +12,9 @@
12:23 ab  Essen Hauptbahnhof: Gleis 4    ICE 849 InterCityExpress Berlin Ostbahnhof
13:07 an  Hamm (Westf): Gleis 5 E-H     

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
13:11 ab  Hamm (Westf): Gleis 5          ICE 859 InterCityExpress Berlin Ostbahnhof
14:31 an  Hannover Hauptbahnhof: Gleis 10

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
14:34 ab  Hannover Hauptbahnhof: Gleis 10 ICE 849 InterCityExpress Berlin Ostbahnhof
16:11 an  Berlin Hbf: Gleis 12 A - D    

@@ -31,13 +23,9 @@
13:23 ab  Essen Hauptbahnhof: Gleis 6    ICE 549 InterCityExpress Berlin Ostbahnhof
14:07 an  Hamm (Westf): Gleis 5 E-H     

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
14:11 ab  Hamm (Westf): Gleis 5          ICE 559 InterCityExpress Berlin Ostbahnhof
15:34 an  Hannover Hauptbahnhof: Gleis 9

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
15:37 ab  Hannover Hauptbahnhof: Gleis 9 ICE 549 InterCityExpress Berlin Ostbahnhof
16:54 an  Berlin-Spandau: Gleis 6 A - C 

@@ -49,13 +37,9 @@
13:23 ab  Essen Hauptbahnhof: Gleis 6    ICE 549 InterCityExpress Berlin Ostbahnhof
14:07 an  Hamm (Westf): Gleis 5 E-H     

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
14:11 ab  Hamm (Westf): Gleis 5          ICE 559 InterCityExpress Berlin Ostbahnhof
15:34 an  Hannover Hauptbahnhof: Gleis 9

# nicht umsteigen,
# Weiterfahrt im selben Fahrzeug möglich
15:37 ab  Hannover Hauptbahnhof: Gleis 9 ICE 549 InterCityExpress Berlin Ostbahnhof
17:08 an  Berlin Hbf: Gleis 12 A - D