Commit 4339f0db authored by Birte Kristina Friesel's avatar Birte Kristina Friesel
Browse files

use Bahn OpenData site for DS100 station list

parent e527672c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ Module::Build->new(
		'Test::Fatal' => 0,
		'Test::More' => 0,
		'Test::Pod' => 0,
		'Text::CSV' => 0,
	},
	configure_requires => {
		'Module::Build' => 0.40,
+693 −702

File changed.

Preview size limit exceeded, changes collapsed.

+17 −72
Original line number Diff line number Diff line
@@ -3,9 +3,8 @@
use strict;
use warnings;
use 5.010;
use Encode qw(decode encode);
use List::Util qw(max sum);
use List::MoreUtils qw(true);
use Encode qw(encode);
use Text::CSV;

say <<'EOF';
package Travel::Status::DE::IRIS::Stations;
@@ -22,87 +21,33 @@ use Text::LevenshteinXS qw(distance);

# TODO switch to Text::Levenshtein::XS once AUR/Debian packages become available

our $VERSION = '1.02';
our $VERSION = '1.04';

my @stations = (
EOF

my @buf;

sub process_block {
	my @histogram;
	my @borders = (0);
	my $run = 0;

	my $length = max (map { length($_) } @buf);
my $csv = Text::CSV->new({binary => 1, sep_char => q{;}});
while (my $line = <STDIN>) {
#	chomp $line;
#	$line = decode('UTF-8', $line);

	for my $i (0 .. $length) {
		$histogram[$i] = true { length($_) < $i or substr($_, $i, 1) eq q{ } } @buf;
	my $status = $csv->parse($line);
	my @fields = $csv->fields;

		if ($histogram[$i] == @buf) {
			if (not $run) {
				push(@borders, $i);
				$run = 1;
			}
		}
		else {
			$run = 0;
		}
	}
	for my $i (0 .. $#borders / 2) {
		for my $line (@buf) {
			my $station_offset = $borders[2 * $i];
			my $name_offset = $borders[2 * $i + 1];
			my $station_length = $name_offset - $station_offset;
			my $name_length = $borders[2 * $i + 2] ? ($borders[2 * $i + 2] - $name_offset) : undef;

			if (length($line) < $station_offset) {
	if ($fields[0] eq 'Abk') {
		next;
	}

			my $station = substr($line, $station_offset, $station_length);
			my $name = $name_length ? substr($line, $name_offset, $name_length) : substr($line, $name_offset);
	my ($station, $name, $country, $location, $valid_since) = @fields;

			$station =~ s{^\s+}{};
			$station =~ s{\s+$}{};
			$station =~ s{\s+}{ }g;
	$name =~ s{!}{ }g;
	$name =~ s{^\s+}{};
	$name =~ s{\s+$}{};
	$name =~ s{\s+}{ }g;
	$name =~ s{'}{\\'}g;

			if (length($station) == 0) {
				next;
			}

	printf("\t['%s','%s'],\n", encode('UTF-8', $station), encode('UTF-8', $name));
}
	}
}

while (my $line = <STDIN>) {
	chomp $line;
	$line = decode('UTF-8', $line);

	if (length($line) == 0 and @buf) {
		process_block();
		@buf = ();
	}

	if ($line !~ m{ ^ [A-Z]{2} }x and $line !~ m{ \s [A-Z]{2,5} \s }x) {
		next;
	}

	$line =~ s{RB-Gr km}{RB-Gr!km}g;
	$line =~ s{RB-Gr!km\s++}{RB-Gr!km!}g;
	$line =~ s{Bad }{Bad!}g;

	push(@buf, $line);
}
if (@buf) {
	process_block();
}

say <<'EOF';
);
+2 −2
Original line number Diff line number Diff line
#!/bin/sh

curl -s http://fahrweg.dbnetze.com/file/fahrweg-de/2394144/vHBDX5OndmGwv-JTA9EzuNArX1E/2361656/data/betriebsstellen.pdf \
| pdftotext -layout - - | perl scripts/acronyms.pl \
curl -s http://data.deutschebahn.com/datasets/betriebsstellen/DBNetz-Betriebsstellenverzeichnis-Stand2015-05.csv \
| perl scripts/acronyms.pl \
> lib/Travel/Status/DE/IRIS/Stations.pm