import re
import csv
import pandas as pd# DOJ State Crime Estimates
csv_file = 'CrimeStatebyState.csv'
# Variable for keeping track of current state in the file
state = ''
# Store the output (list of dictionaries, for importing into pandas)
data = []
with open(csv_file) as f:
# Use csv module to read file
reader = csv.reader(f)
for line in reader:
# Skip blank lines
if len(line) > 0:
if line[0][:9] == 'Estimated':
#regular expression
matches = re.search('Estimated crime in (.*)$', line[0])
state = matches.group(1)
elif state and state != 'United States-Total' and line[0] != 'Year':
state_dict = {
'State': state,
'Year': line[0],
'Population': line[1],
'Violent crime total': line[2]
}
data.append(state_dict)
df = pd.DataFrame(data)
print(df) State Year Population Violent crime total
0 Alabama 1985 4021000 18398
1 Alabama 1986 4053000 22616
2 Alabama 1987 4083000 22833
3 Alabama 1988 4127000 23052
4 Alabama 1989 4118000 24329
... ... ... ... ...
1525 Wyoming 2010 564554 1117
1526 Wyoming 2011 567356 1245
1527 Wyoming 2012 576626 1161
1528 Wyoming 2013 583223 1212
1529 Wyoming 2014 584153 1142
[1530 rows x 4 columns]
# Part B
import re
import csv
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt#read txt file
with open("C:/Users/ashle/Downloads/lab11/worldcities.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
# DMS lat & long
pattern = re.compile(
r"^(.*?)\t" # City/Place name
r"(\d+)°(\d+)'([NS])\t" # Latitude DMS
r"(\d+)°(\d+)'([EW])" # Longitude DMS
)
def dms_to_dd(deg, minutes, hemi):
dec = float(deg) + float(minutes)/60
if hemi in ["S", "W"]:
dec *= -1
return dec
records = []
for line in lines:
m = pattern.search(line)
if m:
city = m.group(1).strip()
lat_deg, lat_min, lat_dir = m.group(2), m.group(3), m.group(4)
lon_deg, lon_min, lon_dir = m.group(5), m.group(6), m.group(7)
lat_dd = dms_to_dd(lat_deg, lat_min, lat_dir)
lon_dd = dms_to_dd(lon_deg, lon_min, lon_dir)
records.append([city, lat_dd, lon_dd])
#dataframe
df = pd.DataFrame(records, columns = ['city', 'latitude', 'longitude'])
#geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
crs="EPSG:4326")
#map with geodataframe
# Download and use the data directly
import requests
import io
url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
r = requests.get(url)
world = gpd.read_file(io.BytesIO(r.content))
ax = world.plot(figsize=(15, 10), edgecolor="black", color="white")
gdf.plot(ax=ax, color="red", markersize=40)
ax.set_title("World Cities Converted from DMS to Decimal Degrees")C:\Users\ashle\anaconda3\Lib\site-packages\pyogrio\core.py:35: RuntimeWarning: Could not detect GDAL data files. Set GDAL_DATA environment variable to the correct path.
_init_gdal_data()
Text(0.5, 1.0, 'World Cities Converted from DMS to Decimal Degrees')
