Commit 5f1502c0 authored by Birte Kristina Friesel's avatar Birte Kristina Friesel
Browse files

Update Stations.pm and acromys.{pl,sh}. Fix 9 code/name combos.

parent ad0697d9
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2,6 +2,9 @@ git HEAD

    * IRIS->new: Fix default lwp_options value (was documented, but not used)
    * Result->route_interesting: Also consider airports
    * Stations: Update DS100 code list. Fixes 9 stations in Köln, Düsseldorf
      and Wuppertal which had a wrong code/name combination assigned due to
      a parser error

Travel::Status::DE::IRIS 1.00 - Fri May 01 2015

+14 −9
Original line number Diff line number Diff line
@@ -1206,6 +1206,7 @@ my @stations = (
	[ 'BBST',  'Bestensee' ],
	[ 'EBWG',  'Bestwig' ],
	[ 'DNOB',  'Betonwerk Oeton' ],
	[ 'FKBH1', 'Betrh Sandh Str' ],
	[ 'XLXBB', 'Bettembg Gr FR' ],
	[ 'XLB',   'Bettembourg' ],
	[ 'NBMS',  'Bettmannsäge' ],
@@ -2641,16 +2642,16 @@ my @stations = (
	[ 'XPCWA', 'Czerwonka' ],
	[ 'XPCS',  'Czestoch Stradom' ],
	[ 'XPC',   'Czestochowa Osob' ],
	[ 'KDA D', 'Abstellbf' ],
	[ 'KDA',   'D Abstellbf' ],
	[ 'KDFF',  'D Flughafen' ],
	[ 'KDFFH', 'D Flughafen Hp' ],
	[ 'KDF D', 'Flughafen Term' ],
	[ 'KDF',   'D Flughafen Term' ],
	[ 'KDF A', 'D Flughf W 884' ],
	[ 'KDH D', 'Hafen' ],
	[ 'KDH',   'D Hafen' ],
	[ 'KDU K', 'D Karthäuser Weg' ],
	[ 'KDVS',  'D Völklinger Str' ],
	[ 'KDV D', 'Volksgarten' ],
	[ 'KDW D', 'Wehrhahn' ],
	[ 'KDV',   'D Volksgarten' ],
	[ 'KDW',   'D Wehrhahn' ],
	[ 'FDAL',  'Da Lichtwiese' ],
	[ 'EDAD',  'Daaden' ],
	[ 'FDA',   'Da-Arheilgen' ],
@@ -4758,6 +4759,7 @@ my @stations = (
	[ 'SGB',   'Göttelborn' ],
	[ 'DGOE',  'Göttengrün-Gef' ],
	[ 'RGH',   'Gottenheim' ],
	[ 'RGHG',  'DB/SWEG' ],
	[ 'NGZ',   'Gotteszell' ],
	[ 'HG',    'Göttingen' ],
	[ 'HG G',  'Göttingen Gbf' ],
@@ -6431,7 +6433,7 @@ my @stations = (
	[ 'XRJU',  'Jurdani' ],
	[ 'XFJU',  'Juvisy' ],
	[ 'KKBP',  'K Barbarossapl' ],
	[ 'KBP K', 'Businesspark' ],
	[ 'KBP',   'K Businesspark' ],
	[ 'KKE N', 'K Eifelt Bez II' ],
	[ 'KKE M', 'K Eifelt Bez III' ],
	[ 'KKE S', 'K Eifelt Bez IV' ],
@@ -6439,7 +6441,7 @@ my @stations = (
	[ 'KKE F', 'K Eifeltor Emf' ],
	[ 'KKE B', 'K Eifeltor Enf' ],
	[ 'KKE K', 'K Eifeltor Esf' ],
	[ 'KKP K', 'Geldernstr/Pa' ],
	[ 'KKP',   'K Geldernstr/Pa' ],
	[ 'KKHRW', 'K Hansar Wendean' ],
	[ 'KKLP',  'K Klettenbergp' ],
	[ 'AKSM',  'K Seefischmarkt' ],
@@ -9434,6 +9436,7 @@ my @stations = (
	[ 'TNST',  'Neuenstadt/Koch.' ],
	[ 'TNN',   'Neuenstein' ],
	[ 'KNEW',  'Neuer Weg' ],
	[ 'KQNE',  'NRW/RP' ],
	[ 'MNF',   'Neufahrn b Frei' ],
	[ 'MNFR',  'Neufahrn/Nbay' ],
	[ 'XBNC',  'Neufchateau' ],
@@ -14172,8 +14175,8 @@ my @stations = (
	[ 'XCVY',  'Vyrica' ],
	[ 'XTVP',  'Vysoka Pec' ],
	[ 'HWRG',  'W Böhme DB-Gr' ],
	[ 'KWR W', 'Rauenthal' ],
	[ 'KWZ W', 'Zool Garten' ],
	[ 'KWR',   'W Rauenthal' ],
	[ 'KWZ',   'W Zool Garten' ],
	[ 'EWFR',  'Wa Unser Fritz' ],
	[ 'FWAB',  'Wabern (Bz Ksl)' ],
	[ 'HWAD',  'Wachendorf' ],
@@ -14719,6 +14722,7 @@ my @stations = (
	[ 'SWIN',  'Wincheringen' ],
	[ 'EWIL',  'Windelsbleiche' ],
	[ 'RWND',  'Winden (Pfalz)' ],
	[ 'FQWH',  'NRW/RP' ],
	[ 'HWDH',  'Windheim (Weser)' ],
	[ 'NWB',   'Windischeschenb' ],
	[ 'XAWIG', 'Windischgarsten' ],
@@ -14894,6 +14898,7 @@ my @stations = (
	[ 'UWFN',  'Wünschendorf N' ],
	[ 'BWUE',  'Wünsdorf-Waldst' ],
	[ 'NWR A', 'Wür Rbf Ausfahrt' ],
	[ 'HQWS',  'NRW/HE' ],
	[ 'EWGD',  'Würgendorf' ],
	[ 'EWGT',  'Würgendorf Ort' ],
	[ 'NWHO',  'Wür-Heidingsf O' ],
+73 −12
Original line number Diff line number Diff line
@@ -3,14 +3,9 @@
use strict;
use warnings;
use 5.010;

my $re_line = qr{
	^
	(?<acronym> [A-Z]{2}[A-Z ]{0,3} )
	\s
	(?<name> .+)
	$
}x;
use Encode qw(decode encode);
use List::Util qw(max sum);
use List::MoreUtils qw(true);

say <<'EOF';
package Travel::Status::DE::IRIS::Stations;
@@ -27,15 +22,81 @@ our $VERSION = '1.00';
my @stations = (
EOF

my @buf;

sub process_block {
	my @histogram;
	my @borders = (0);
	my $run = 0;

	my $length = max (map { length($_) } @buf);

	for my $i (0 .. $length) {
		$histogram[$i] = true { length($_) < $i or substr($_, $i, 1) eq q{ } } @buf;

		if ($histogram[$i] == @buf) {
			if (not $run) {
				push(@borders, $i);
				$run = 1;
			}
		}
		else {
			$run = 0;
		}
	}
	for my $i (0 .. $#borders / 2) {
		for my $line (@buf) {
			my $station_offset = $borders[2 * $i];
			my $name_offset = $borders[2 * $i + 1];
			my $station_length = $name_offset - $station_offset;
			my $name_length = $borders[2 * $i + 2] ? ($borders[2 * $i + 2] - $name_offset) : undef;

			if (length($line) < $station_offset) {
				next;
			}

			my $station = substr($line, $station_offset, $station_length);
			my $name = $name_length ? substr($line, $name_offset, $name_length) : substr($line, $name_offset);

			$station =~ s{^\s+}{};
			$station =~ s{\s+$}{};
			$station =~ s{\s+}{ }g;
			$name =~ s{!}{ }g;
			$name =~ s{^\s+}{};
			$name =~ s{\s+$}{};
			$name =~ s{\s+}{ }g;
			$name =~ s{'}{\\'}g;

			if (length($station) == 0) {
				next;
			}

			printf("\t['%s','%s'],\n", encode('UTF-8', $station), encode('UTF-8', $name));
		}
	}
}

while (my $line = <STDIN>) {
	chomp $line;
	$line = decode('UTF-8', $line);

	if ($line =~ $re_line) {
		my ($station, $name) = @+{qw{acronym name}};
		$name =~ s{'}{\\'}g;
	if (length($line) == 0 and @buf) {
		process_block();
		@buf = ();
	}

	if ($line !~ m{ ^ [A-Z]{2} }x and $line !~ m{ \s [A-Z]{2,5} \s }x) {
		next;
	}

	$line =~ s{RB-Gr km}{RB-Gr!km}g;
	$line =~ s{RB-Gr!km\s++}{RB-Gr!km!}g;
	$line =~ s{Bad }{Bad!}g;

		printf("\t['%s','%s'],\n", $station, $name);
	push(@buf, $line);
}
if (@buf) {
	process_block();
}

say <<'EOF';
+1 −1
Original line number Diff line number Diff line
#!/bin/sh

curl -s http://fahrweg.dbnetze.com/file/fahrweg-de/2394144/vHBDX5OndmGwv-JTA9EzuNArX1E/2361656/data/betriebsstellen.pdf \
| pdftotext -raw - - | perl scripts/acronyms.pl \
| pdftotext -layout - - | perl scripts/acronyms.pl \
> lib/Travel/Status/DE/IRIS/Stations.pm