OSM: support way results for more accurate search.

This commit is contained in:
projectmoon 2024-09-17 22:30:27 +00:00
parent b832c3c387
commit fc00abe030
2 changed files with 98 additions and 65 deletions

View File

@ -1,5 +1,12 @@
# OpenStreetMap Tool # OpenStreetMap Tool
**0.5.0:**
- Support Way results. This makes searching much more accurate and
useful. Many map features are marked as "ways" (shapes on the map)
rather than specific points.
- Drop support for "secondary results," and instead return Ways that
have enough information to be useful.
**0.4.0:** **0.4.0:**
- Complete rewrite of search result handling to prevent incorrect OSM - Complete rewrite of search result handling to prevent incorrect OSM
map links being generated, and bad info given. map links being generated, and bad info given.

156
osm.py
View File

@ -2,7 +2,7 @@
title: OpenStreetMap Tool title: OpenStreetMap Tool
author: projectmoon author: projectmoon
author_url: https://git.agnos.is/projectmoon/open-webui-filters author_url: https://git.agnos.is/projectmoon/open-webui-filters
version: 0.4.0 version: 0.5.0
license: AGPL-3.0+ license: AGPL-3.0+
required_open_webui_version: 0.3.21 required_open_webui_version: 0.3.21
""" """
@ -101,19 +101,35 @@ def way_has_info(way):
""" """
return len(way['tags']) > 1 and any('name' in tag for tag in way['tags']) return len(way['tags']) > 1 and any('name' in tag for tag in way['tags'])
def strip_nodes_from_way(way): def process_way_result(way) -> Optional[dict]:
"""
Post-process an OSM Way dict to remove the geometry and node
info, and calculate a single GPS coordinate from its bounding
box.
"""
if 'nodes' in way: if 'nodes' in way:
del way['nodes'] del way['nodes']
return way
if 'geometry' in way:
del way['geometry']
if 'bounds' in way:
way_center = get_bounding_box_center(way['bounds'])
way['lat'] = way_center['lat']
way['lon'] = way_center['lon']
del way['bounds']
return way
return None
def get_bounding_box_center(bbox): def get_bounding_box_center(bbox):
def convert(bbox, key): def convert(bbox, key):
return bbox[key] if isinstance(bbox[key], float) else float(bbox[key]) return bbox[key] if isinstance(bbox[key], float) else float(bbox[key])
min_lat = convert(bbox, 'min_lat') min_lat = convert(bbox, 'minlat')
min_lon = convert(bbox, 'min_lon') min_lon = convert(bbox, 'minlon')
max_lat = convert(bbox, 'max_lat') max_lat = convert(bbox, 'maxlat')
max_lon = convert(bbox, 'max_lon') max_lon = convert(bbox, 'maxlon')
return { return {
'lon': (min_lon + max_lon) / 2, 'lon': (min_lon + max_lon) / 2,
@ -147,38 +163,56 @@ def sort_by_closeness(origin, points):
point['distance'] = distance point['distance'] = distance
return [point for point, distance in points_with_distance] return [point for point, distance in points_with_distance]
def get_or_unknown(tags: dict, *keys: str) -> str: def get_or_none(tags: dict, *keys: str) -> Optional[str]:
""" """
Try to extract a value from a dict by trying keys in order, or Try to extract a value from a dict by trying keys in order, or
return unknown if none of the keys were found. return None if none of the keys were found.
""" """
for key in keys: for key in keys:
if key in tags: if key in tags:
return tags[key] return tags[key]
return "unknown" return None
def parse_thing_address(tags: dict) -> str: def all_are_none(*args) -> bool:
#'addr:city': 'Haarlem', 'addr:housenumber': '8', 'addr:postcode': '2012DG', 'addr:street' for arg in args:
house_number = get_or_unknown(tags, "addr:housenumber", "addr:house_number") if arg is not None:
street = get_or_unknown(tags, "addr:street") return False
city = get_or_unknown(tags, "addr:city")
state = get_or_unknown(tags, "addr:state", "addr:province")
postal_code = get_or_unknown(tags,
"addr:postcode", "addr:post_code", "addr:postal_code",
"addr:zipcode", "addr:zip_code")
# Bit of a hack to make sure spacing of state is correct, whether return True
# or not it's present.
state = f" {state} " if state != "unknown" else " "
return f"{street} {house_number}, {city}{state}{postal_code}" def parse_thing_address(tags: dict) -> Optional[str]:
house_number = get_or_none(tags, "addr:housenumber", "addr:house_number")
street = get_or_none(tags, "addr:street")
city = get_or_none(tags, "addr:city")
state = get_or_none(tags, "addr:state", "addr:province")
postal_code = get_or_none(
tags,
"addr:postcode", "addr:post_code", "addr:postal_code",
"addr:zipcode", "addr:zip_code"
)
# if all are none, that means we don't know the address at all.
if all_are_none(house_number, street, city, state, postal_code):
return None
# Handle missing values to create complete-ish addresses, even if
# we have missing data. We will get either a partly complete
# address, or None if all the values are missing.
line1 = filter(None, [street, house_number])
line2 = filter(None, [city, state, postal_code])
line1 = " ".join(line1).strip()
line2 = " ".join(line2).strip()
full_address = filter(None, [line1, line2])
full_address = ", ".join(full_address).strip()
return full_address if len(full_address) > 0 else None
def parse_and_validate_thing(thing: dict) -> Optional[dict]: def parse_and_validate_thing(thing: dict) -> Optional[dict]:
""" """
Parse an OSM result and make it more friendly to work with. Parse an OSM result (node or post-processed way) and make it
Helps remove ambiguity of the LLM interpreting the raw JSON data. more friendly to work with. Helps remove ambiguity of the LLM
If there is not enough data, discard the result. interpreting the raw JSON data. If there is not enough data,
discard the result.
""" """
tags: dict = thing['tags'] if 'tags' in thing else {} tags: dict = thing['tags'] if 'tags' in thing else {}
@ -209,7 +243,10 @@ def parse_and_validate_thing(thing: dict) -> Optional[dict]:
def create_osm_link(lat, lon): def create_osm_link(lat, lon):
return EXAMPLE_OSM_LINK.replace("<lat>", str(lat)).replace("<lon>", str(lon)) return EXAMPLE_OSM_LINK.replace("<lat>", str(lat)).replace("<lon>", str(lon))
def convert_and_validate_results(original_location: str, things_nearby: List[dict]) -> Optional[str]: def convert_and_validate_results(
original_location: str,
things_nearby: List[dict]
) -> Optional[str]:
""" """
Converts the things_nearby JSON into Markdown-ish results to Converts the things_nearby JSON into Markdown-ish results to
(hopefully) improve model understanding of the results. Intended (hopefully) improve model understanding of the results. Intended
@ -268,10 +305,9 @@ class OsmSearcher:
"""Let user valve for instruction mode override the global setting.""" """Let user valve for instruction mode override the global setting."""
print(str(self.user_valves)) print(str(self.user_valves))
if self.user_valves: if self.user_valves:
print(f"Using user valve setting: {self.user_valves.instruction_oriented_interpretation}")
return self.user_valves.instruction_oriented_interpretation return self.user_valves.instruction_oriented_interpretation
else: else:
self.valves.instruction_oriented_interpretation return self.valves.instruction_oriented_interpretation
def get_result_instructions(self, tag_type_str: str) -> str: def get_result_instructions(self, tag_type_str: str) -> str:
if self.use_detailed_interpretation_mode(): if self.use_detailed_interpretation_mode():
@ -332,6 +368,11 @@ class OsmSearcher:
def overpass_search( def overpass_search(
self, place, tags, bbox, limit=5, radius=4000 self, place, tags, bbox, limit=5, radius=4000
) -> (List[dict], List[dict]): ) -> (List[dict], List[dict]):
"""
Return a list relevant of OSM nodes and ways. Some
post-processing is done on ways in order to add coordinates to
them.
"""
headers = self.create_headers() headers = self.create_headers()
if not headers: if not headers:
raise ValueError("Headers not set") raise ValueError("Headers not set")
@ -354,23 +395,25 @@ class OsmSearcher:
if len(search) > 0: if len(search) > 0:
search += ";" search += ";"
# "out geom;" is needed to get bounding box info of ways,
# so we can calculate the coordinates.
query = f""" query = f"""
[out:json]; [out:json];
( (
{search} {search}
); );
out qt; out geom;
""" """
print(query) print(query)
data = { "data": query } data = { "data": query }
response = requests.get(url, params=data, headers=headers) response = requests.get(url, params=data, headers=headers)
if response.status_code == 200: if response.status_code == 200:
# nodes are prioritized because they have exact GPS # nodes have have exact GPS coordinates. we also include
# coordinates. we also include useful way entries (without # useful way entries, post-processed to remove extra data
# node list) as secondary results, because there are often # and add a centered calculation of their GPS coords. any
# useful results that don't have a node (e.g. building or # way that doesn't have enough info for us to use is
# whole area marked for the tag type). # dropped.
results = response.json() results = response.json()
results = results['elements'] if 'elements' in results else [] results = results['elements'] if 'elements' in results else []
nodes = [] nodes = []
@ -382,7 +425,7 @@ class OsmSearcher:
if res['type'] == 'node': if res['type'] == 'node':
nodes.append(res) nodes.append(res)
elif res['type'] == 'way' and way_has_info(res): elif res['type'] == 'way' and way_has_info(res):
ways.append(strip_nodes_from_way(res)) ways.append(process_way_result(res))
return nodes, ways return nodes, ways
else: else:
@ -400,10 +443,10 @@ class OsmSearcher:
if nominatim_result: if nominatim_result:
nominatim_result = nominatim_result[0] nominatim_result = nominatim_result[0]
bbox = { bbox = {
'min_lat': nominatim_result['boundingbox'][0], 'minlat': nominatim_result['boundingbox'][0],
'max_lat': nominatim_result['boundingbox'][1], 'maxlat': nominatim_result['boundingbox'][1],
'min_lon': nominatim_result['boundingbox'][2], 'minlon': nominatim_result['boundingbox'][2],
'max_lon': nominatim_result['boundingbox'][3] 'maxlon': nominatim_result['boundingbox'][3]
} }
nodes, ways = self.overpass_search(place, tags, bbox, limit, radius) nodes, ways = self.overpass_search(place, tags, bbox, limit, radius)
@ -411,48 +454,31 @@ class OsmSearcher:
# use results from overpass, but if they do not exist, # use results from overpass, but if they do not exist,
# fall back to the nominatim result. this may or may # fall back to the nominatim result. this may or may
# not be a good idea. # not be a good idea.
things_nearby = (nodes things_nearby = (nodes + ways
if len(nodes) > 0 if len(nodes) > 0 or len(ways) > 0
else OsmSearcher.fallback(nominatim_result)) else OsmSearcher.fallback(nominatim_result))
origin = get_bounding_box_center(bbox) origin = get_bounding_box_center(bbox)
things_nearby = sort_by_closeness(origin, things_nearby) things_nearby = sort_by_closeness(origin, things_nearby)
things_nearby = things_nearby[:limit] things_nearby = things_nearby[:limit] # drop down to requested limit
primary_results = convert_and_validate_results(place, things_nearby) search_results = convert_and_validate_results(place, things_nearby)
other_results = ways[:(limit+5)]
if not things_nearby or len(things_nearby) == 0: if not things_nearby or len(things_nearby) == 0:
return NO_RESULTS return NO_RESULTS
tag_type_str = ", ".join(tags) tag_type_str = ", ".join(tags)
if len(other_results) > 0:
extra_info = (
"\n\n----------\n\n"
f"Additionally, here are some other results that might be useful. "
"The exact distance from the requested location is not known. "
"The seconary results are below."
"\n\n----------\n\n"
f"{str(other_results)}")
else:
extra_info = ""
# Only print the full result instructions if we # Only print the full result instructions if we
# actually have something. # actually have something.
if primary_results: if search_results:
result_instructions = self.get_result_instructions(tag_type_str) result_instructions = self.get_result_instructions(tag_type_str)
else: else:
if len(other_results) > 0: result_instructions = ("No results found at all. "
result_instructions = ("No primary results found, but there are secondary results." "Tell the user there are no results.")
"These results have less details, but still have useful "
"information.")
else:
result_instructions = "No results found at all. Tell the user there are no results."
resp = ( resp = (
f"{result_instructions}\n\n" f"{result_instructions}\n\n"
f"{primary_results}" f"{search_results}"
f"{extra_info}"
) )
print(resp) print(resp)