Fix Wikipedia API with User-Agent (#822)
* refactor(serializers): remove unused gpxpy and geojson imports * fix(generate_description): improve error handling and response validation for Wikipedia API calls * Potential fix for code scanning alert no. 42: Information exposure through an exception Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * fix(generate_description): improve error logging for Wikipedia API data fetch failures * chore(deps): bump devalue (#823) Bumps the npm_and_yarn group with 1 update in the /frontend directory: [devalue](https://github.com/sveltejs/devalue). Updates `devalue` from 5.1.1 to 5.3.2 - [Release notes](https://github.com/sveltejs/devalue/releases) - [Changelog](https://github.com/sveltejs/devalue/blob/main/CHANGELOG.md) - [Commits](https://github.com/sveltejs/devalue/compare/v5.1.1...v5.3.2) --- updated-dependencies: - dependency-name: devalue dependency-version: 5.3.2 dependency-type: indirect dependency-group: npm_and_yarn ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Sean Morley <98704938+seanmorley15@users.noreply.github.com> * Refactor help documentation link in settings page - Updated the condition to display the help documentation link based on the `wandererEnabled` flag. - Removed the conditional rendering for staff users and Strava integration status. - Changed the documentation link to point to the Immich integration documentation. * fix(locations): update include_collections parameter handling for default behavior * Update backend/server/adventures/views/generate_description_view.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -3,42 +3,137 @@ from rest_framework.decorators import action
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
import requests
|
||||
from django.conf import settings
|
||||
import urllib.parse
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GenerateDescription(viewsets.ViewSet):
|
||||
permission_classes = [IsAuthenticated]
|
||||
|
||||
# User-Agent header required by Wikipedia API
|
||||
HEADERS = {
|
||||
'User-Agent': f'AdventureLog/{getattr(settings, "ADVENTURELOG_RELEASE_VERSION", "unknown")}'
|
||||
}
|
||||
|
||||
@action(detail=False, methods=['get'],)
|
||||
@action(detail=False, methods=['get'])
|
||||
def desc(self, request):
|
||||
name = self.request.query_params.get('name', '')
|
||||
# un url encode the name
|
||||
name = name.replace('%20', ' ')
|
||||
name = self.get_search_term(name)
|
||||
url = 'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=extracts&exintro&explaintext&format=json&titles=%s' % name
|
||||
response = requests.get(url)
|
||||
data = response.json()
|
||||
data = response.json()
|
||||
page_id = next(iter(data["query"]["pages"]))
|
||||
extract = data["query"]["pages"][page_id]
|
||||
if extract.get('extract') is None:
|
||||
return Response({"error": "No description found"}, status=400)
|
||||
return Response(extract)
|
||||
@action(detail=False, methods=['get'],)
|
||||
if not name:
|
||||
return Response({"error": "Name parameter is required"}, status=400)
|
||||
|
||||
# Properly URL decode the name
|
||||
name = urllib.parse.unquote(name)
|
||||
search_term = self.get_search_term(name)
|
||||
|
||||
if not search_term:
|
||||
return Response({"error": "No matching Wikipedia article found"}, status=404)
|
||||
|
||||
# Properly URL encode the search term for the API
|
||||
encoded_term = urllib.parse.quote(search_term)
|
||||
url = f'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=extracts&exintro&explaintext&format=json&titles={encoded_term}'
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=self.HEADERS, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
pages = data.get("query", {}).get("pages", {})
|
||||
if not pages:
|
||||
return Response({"error": "No page data found"}, status=404)
|
||||
|
||||
page_id = next(iter(pages))
|
||||
page_data = pages[page_id]
|
||||
|
||||
# Check if page exists (page_id of -1 means page doesn't exist)
|
||||
if page_id == "-1":
|
||||
return Response({"error": "Wikipedia page not found"}, status=404)
|
||||
|
||||
if not page_data.get('extract'):
|
||||
return Response({"error": "No description found"}, status=404)
|
||||
|
||||
return Response(page_data)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.exception("Failed to fetch data from Wikipedia")
|
||||
return Response({"error": "Failed to fetch data from Wikipedia."}, status=500)
|
||||
except ValueError as e: # JSON decode error
|
||||
return Response({"error": "Invalid response from Wikipedia API"}, status=500)
|
||||
|
||||
@action(detail=False, methods=['get'])
|
||||
def img(self, request):
|
||||
name = self.request.query_params.get('name', '')
|
||||
# un url encode the name
|
||||
name = name.replace('%20', ' ')
|
||||
name = self.get_search_term(name)
|
||||
url = 'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=pageimages&format=json&piprop=original&titles=%s' % name
|
||||
response = requests.get(url)
|
||||
data = response.json()
|
||||
page_id = next(iter(data["query"]["pages"]))
|
||||
extract = data["query"]["pages"][page_id]
|
||||
if extract.get('original') is None:
|
||||
return Response({"error": "No image found"}, status=400)
|
||||
return Response(extract["original"])
|
||||
if not name:
|
||||
return Response({"error": "Name parameter is required"}, status=400)
|
||||
|
||||
# Properly URL decode the name
|
||||
name = urllib.parse.unquote(name)
|
||||
search_term = self.get_search_term(name)
|
||||
|
||||
if not search_term:
|
||||
return Response({"error": "No matching Wikipedia article found"}, status=404)
|
||||
|
||||
# Properly URL encode the search term for the API
|
||||
encoded_term = urllib.parse.quote(search_term)
|
||||
url = f'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=pageimages&format=json&piprop=original&titles={encoded_term}'
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=self.HEADERS, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
pages = data.get("query", {}).get("pages", {})
|
||||
if not pages:
|
||||
return Response({"error": "No page data found"}, status=404)
|
||||
|
||||
page_id = next(iter(pages))
|
||||
page_data = pages[page_id]
|
||||
|
||||
# Check if page exists
|
||||
if page_id == "-1":
|
||||
return Response({"error": "Wikipedia page not found"}, status=404)
|
||||
|
||||
original_image = page_data.get('original')
|
||||
if not original_image:
|
||||
return Response({"error": "No image found"}, status=404)
|
||||
|
||||
return Response(original_image)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.exception("Failed to fetch data from Wikipedia")
|
||||
return Response({"error": "Failed to fetch data from Wikipedia."}, status=500)
|
||||
except ValueError as e: # JSON decode error
|
||||
return Response({"error": "Invalid response from Wikipedia API"}, status=500)
|
||||
|
||||
def get_search_term(self, term):
|
||||
response = requests.get(f'https://en.wikipedia.org/w/api.php?action=opensearch&search={term}&limit=10&namespace=0&format=json')
|
||||
data = response.json()
|
||||
if data[1] and len(data[1]) > 0:
|
||||
return data[1][0]
|
||||
if not term:
|
||||
return None
|
||||
|
||||
# Properly URL encode the search term
|
||||
encoded_term = urllib.parse.quote(term)
|
||||
url = f'https://en.wikipedia.org/w/api.php?action=opensearch&search={encoded_term}&limit=10&namespace=0&format=json'
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=self.HEADERS, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if response is empty
|
||||
if not response.text.strip():
|
||||
return None
|
||||
|
||||
data = response.json()
|
||||
|
||||
# OpenSearch API returns an array with 4 elements:
|
||||
# [search_term, [titles], [descriptions], [urls]]
|
||||
if len(data) >= 2 and data[1] and len(data[1]) > 0:
|
||||
return data[1][0] # Return the first title match
|
||||
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException:
|
||||
# If search fails, return the original term as fallback
|
||||
return term
|
||||
except ValueError: # JSON decode error
|
||||
# If JSON parsing fails, return the original term as fallback
|
||||
return term
|
||||
Reference in New Issue
Block a user