Improve google url parsing

This commit is contained in:
Austen Adler 2023-04-27 21:22:00 -04:00
parent 24df98878c
commit b12d9137b5
2 changed files with 51 additions and 9 deletions

View File

@ -45,7 +45,7 @@ impl LatLon {
/// Parse a latitude and longitude as two floating point numbers separated by a comma and/or whitespace only (no bearings) /// Parse a latitude and longitude as two floating point numbers separated by a comma and/or whitespace only (no bearings)
/// Parse only the entire string /// Parse only the entire string
pub fn parse_full(i: &str) -> IResult<&str, Self> { pub fn parse(i: &str) -> IResult<&str, Self> {
map_res( map_res(
tuple(( tuple((
space0, space0,
@ -53,10 +53,8 @@ impl LatLon {
optional_separator(','), optional_separator(','),
space0, space0,
parse_f64, parse_f64,
space0,
eof,
)), )),
|(_, lat, _, _, lon, _, _)| Self::new(lat, lon), |(_, lat, _, _, lon)| Self::new(lat, lon),
)(i) )(i)
} }
} }

View File

@ -5,6 +5,7 @@ use nom::{
character::complete::{self, digit1}, character::complete::{self, digit1},
combinator::{eof, fail, map, map_res, rest}, combinator::{eof, fail, map, map_res, rest},
error::context, error::context,
multi::many0,
sequence::{pair, tuple}, sequence::{pair, tuple},
IResult, IResult,
}; };
@ -54,11 +55,28 @@ impl CoordinateUrls {
fn parse_google_maps(i: &str) -> IResult<&str, LatLon> { fn parse_google_maps(i: &str) -> IResult<&str, LatLon> {
let (_, url) = Self::parse_url_full(i)?; let (_, url) = Self::parse_url_full(i)?;
// https://www.google.com/maps/place/69%C2%B047'21.8%22N+108%C2%B022'45.4%22W/@69.7869906,-108.3727563,15.31z/data=!4m4!3m3!8m2!3d69.7893868!4d-108.3792642
for segment in url.path_segments().into_iter().flatten() { for segment in url.path_segments().into_iter().flatten() {
// https://www.google.com/maps/place/69%C2%B047'21.8%22N+108%C2%B022'45.4%22W/@69.7869906,-108.3727563,15.31z/data=!4m4!3m3!8m2!3d69.7893868!4d-108.3792642
// https://www.google.com/maps/@35.7224075,-78.4170755,8z
if let Ok((_str, (_, ret))) = pair(complete::char('@'), LatLon::parse)(segment) { if let Ok((_str, (_, ret))) = pair(complete::char('@'), LatLon::parse)(segment) {
return Ok(("", ret)); return Ok(("", ret));
} }
// https://www.google.com/maps/search/35.416336,+-79.751914
if let Ok((_str, ret)) = map_res(
tuple((
// Whitespace can be encoded as '+'
many0(complete::char('+')),
parse_f64,
complete::char(','),
many0(complete::char('+')),
parse_f64,
)),
|(_, lat, _, _, lon)| LatLon::new(lat, lon),
)(segment)
{
return Ok(("", ret));
}
} }
for (key, value) in url.query_pairs() { for (key, value) in url.query_pairs() {
@ -248,9 +266,35 @@ mod tests {
#[test] #[test]
fn test_general() { fn test_general() {
assert!(dbg!(CoordinateUrls::parse( macro_rules! p {
"https://www.google.com/maps/search/?query=27,23" ($url:expr, $lat:expr, $lon:expr) => {
)) assert_eq!(
.is_ok()); CoordinateUrls::parse($url).unwrap().1.latlon,
LatLon::new($lat, $lon).unwrap()
);
};
}
p!("https://www.google.com/maps/place/69%C2%B047'21.8%22N+108%C2%B022'45.4%22W/@69.7869906,-108.3727563,15.31z/data=!4m4!3m3!8m2!3d69.7893868!4d-108.3792642", 69.7869906,-108.3727563);
p!(
"https://www.google.com/maps/search/?query=27,23",
27.0,
23.0
);
p!(
"https://www.google.com/maps/search/35.416336,+-79.751914",
35.416336,
-79.751914
);
p!("https://www.google.com/maps/place/69%C2%B047'21.8%22N+108%C2%B022'45.4%22W/@69.7869906,-108.3727563,15.31z/data=!4m4!3m3!8m2!3d69.7893868!4d-108.3792642", 69.7869906,-108.3727563);
p!(
"https://www.google.com/maps/@35.7224075,-78.4170755,8z",
35.7224075,
-78.4170755
);
} }
} }