Monday, June 1, 2009

Using PhP to parse some addresses

I wrote a PhP script to parse addresses that were coming into me in a format that was less than manageable. The format below was what the addresses looked like:

Bay DR W, 364

The Php took that and parsed that into street name, street number, street indicator and direction. It created a new table and put the parsed values into that table, which I then exported and used to match up with the Kitsap County SITUS database to verify the addresses that I was getting out (the Kitsap County SITUS is similarly parsed to the new table that I created).



The script I wrote is below: The basic logic is this: take everything that is to the right of the comma and call it the street number. Then, take what is left, and see if any of it matches either a road indicator or a direction. If it does - put it in the proper field, and then take everything that is left over and call that the street name. It worked for 98% of the addresses I was using - and the atypical ones had been entered wrong, and were subsequently corrected.



//php code written by Dominic Ebacher, May 2009
// Opens a connection to a MySQL server.
$username="name";
$password="pw";
$database="db_POAddress";
$connection = mysql_connect ('localhost', $username, $password);
if (!$connection) {
die('Not connected : ' . mysql_error());
}
// Sets the active MySQL database. You can't select a database unless the connection is open, can you?
$db_selected = mysql_select_db($database, $connection);
if (!$db_selected) {
die('Can\'t use db : ' . mysql_error());
}

$query0 ="drop TABLE tbl_Addresses_Parsed";
$result = mysql_query($query0);

$query1 ="CREATE TABLE tbl_Addresses_Parsed
(
id MEDIUMINT(5) NOT NULL AUTO_INCREMENT,
ST_NO integer,
ST_NAME text,
DIRECTION text,
INDICATOR text,
TYPE text,
SERVICE text,
NO_UNITS varchar(5),
NOTES text,
PRIMARY KEY (id)
)";
$result = mysql_query($query1);

$query = "SELECT * FROM tbl_POAddresses";
$result = mysql_query($query);
if (!$result){ die('Invalid query: ' . mysql_error()); }
while ($row = @mysql_fetch_array($result)){

$FullAddress= $row['Address'];
$AddressArray=explode(",", $FullAddress);
$number =$AddressArray[1];
$street =$AddressArray[0];
$type=$row['Type'];
$service=$row['Service'];
$unit=$row['NO_Units'];
$notes=$row['Notes'];
$StreetArray=explode(" ", $street);

$numPts = sizeof($StreetArray);


for ($p = 0; $p <= $numPts - 1; $p++) {


if ($StreetArray[$p]=='ST') {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]=='WAY') {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'PL' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'DR' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'AVE' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'CT' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'LN' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'RD' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'PKWY' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'BLVD' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'TERRACE' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'HWY' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'RT' ) {$indicator=$StreetArray[$p]; unset($StreetArray[$p]);}


if ($StreetArray[$p]== 'N' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'S' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'E' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'W' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'NW' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'NE' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'SW' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'SE' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'n' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 's' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'e' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'w' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'nw' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'ne' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'sw' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}
if ( $StreetArray[$p]== 'se' ) {$directional=$StreetArray[$p]; unset($StreetArray[$p]);}

}
$streetname=implode(" ",$StreetArray);
$o=" ";
//echo "st no:";
//echo $number;
//echo "st Direction:";
//echo $directional;
//echo "St Name:";
//echo $streetname;
//echo "Indicator:";
//echo $indicator;
$foo=$number.$o.$directional.$o.$streetname.$o.$indicator;
//echo $foo;

$numbera[]=$number;
$streetnamea[]=$streetname;
$directionala[]=$directional;
$indicatora[]=$indicator;
$servicea[]=$service;
$notesa[]=$notes;
$typea[]=$type;
$unita[]=$unit;
unset($directional);
unset($type);
unset($unit);
unset($notes);
unset($service);
unset($indicator);
unset($number);
unset($streetname);
//$query2 ="Insert into tbl_Addresses_Parsed (ST_NO,ST_NAME,DIRECTION,INDICATOR) VALUES ('$number','$streetname','$directional','$indicator')";
//$result = mysql_query($query2);



echo "
";

}
echo "end";
$numEnt=sizeof($streetnamea);

for ($p = 0; $p <= $numEnt - 1; $p++) {
$query ="Insert into tbl_Addresses_Parsed (ST_NO,ST_NAME,DIRECTION,INDICATOR,TYPE,SERVICE,NO_UNITS,NOTES) VALUES ('$numbera[$p]','$streetnamea[$p]','$directionala[$p]','$indicatora[$p]','$typea[$p]','$servicea[$p]','$unita[$p]','$notesa[$p]')";
$result = mysql_query($query);
}
?>

No comments:

Post a Comment