File size: 10,992 Bytes
8b7b267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/usr/bin/env python3
"""
Cryptocurrency News API Client - REAL DATA ONLY
Fetches real news from NewsAPI, CryptoPanic, and RSS feeds
NO MOCK DATA - All news from real sources
"""

import httpx
import logging
import os
import hashlib
import feedparser
from typing import Dict, Any, List, Optional
from datetime import datetime
from fastapi import HTTPException

logger = logging.getLogger(__name__)


class CryptoNewsClient:
    """
    Real Cryptocurrency News API Client
    Aggregates news from multiple real sources
    """
    
    def __init__(self):
        # NewsAPI
        self.newsapi_key = os.getenv("NEWSAPI_KEY", "")
        self.newsapi_url = "https://newsapi.org/v2"
        
        # CryptoPanic
        self.cryptopanic_token = os.getenv("CRYPTOPANIC_TOKEN", "")
        self.cryptopanic_url = "https://cryptopanic.com/api/v1"
        
        # RSS Feeds - Updated URLs for reliability
        self.rss_feeds = {
            "coindesk": "https://www.coindesk.com/arc/outboundfeeds/rss/",
            "cointelegraph": "https://cointelegraph.com/rss",
            "decrypt": "https://decrypt.co/feed",
            "bitcoinist": "https://bitcoinist.com/feed/",
            "cryptoslate": "https://cryptoslate.com/feed/"
        }
        
        self.timeout = 15.0
    
    async def get_latest_news(self, limit: int = 20) -> List[Dict[str, Any]]:
        """
        Get REAL latest cryptocurrency news
        Tries multiple sources with fallback
        
        Returns:
            List of real news articles
        """
        articles = []
        
        # Try NewsAPI first (if API key available)
        if self.newsapi_key:
            try:
                newsapi_articles = await self._fetch_from_newsapi(limit=limit)
                articles.extend(newsapi_articles)
                
                if len(articles) >= limit:
                    logger.info(f"✅ NewsAPI: Fetched {len(articles)} real articles")
                    return articles[:limit]
            except Exception as e:
                logger.warning(f"⚠️ NewsAPI failed: {e}")
        
        # Try CryptoPanic (if token available)
        if self.cryptopanic_token and len(articles) < limit:
            try:
                cryptopanic_articles = await self._fetch_from_cryptopanic(
                    limit=limit - len(articles)
                )
                articles.extend(cryptopanic_articles)
                
                if len(articles) >= limit:
                    logger.info(
                        f"✅ CryptoPanic: Fetched {len(articles)} real articles"
                    )
                    return articles[:limit]
            except Exception as e:
                logger.warning(f"⚠️ CryptoPanic failed: {e}")
        
        # Fallback to RSS feeds
        if len(articles) < limit:
            try:
                rss_articles = await self._fetch_from_rss_feeds(
                    limit=limit - len(articles)
                )
                articles.extend(rss_articles)
                
                logger.info(f"✅ RSS Feeds: Fetched {len(articles)} real articles")
            except Exception as e:
                logger.warning(f"⚠️ RSS feeds failed: {e}")
        
        # If still no articles, raise error
        if len(articles) == 0:
            raise HTTPException(
                status_code=503,
                detail="All news sources temporarily unavailable"
            )
        
        logger.info(
            f"✅ Successfully fetched {len(articles)} real news articles "
            f"from multiple sources"
        )
        return articles[:limit]
    
    async def _fetch_from_newsapi(self, limit: int = 20) -> List[Dict[str, Any]]:
        """Fetch REAL news from NewsAPI"""
        try:
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                response = await client.get(
                    f"{self.newsapi_url}/everything",
                    params={
                        "q": "cryptocurrency OR bitcoin OR ethereum OR crypto",
                        "apiKey": self.newsapi_key,
                        "language": "en",
                        "sortBy": "publishedAt",
                        "pageSize": min(limit, 100)
                    }
                )
                response.raise_for_status()
                data = response.json()
                
                articles = []
                for article in data.get("articles", []):
                    # Parse timestamp
                    published_at = article.get("publishedAt", "")
                    try:
                        dt = datetime.fromisoformat(
                            published_at.replace("Z", "+00:00")
                        )
                        timestamp = int(dt.timestamp() * 1000)
                    except:
                        timestamp = int(datetime.utcnow().timestamp() * 1000)
                    
                    articles.append({
                        "title": article.get("title", ""),
                        "description": article.get("description", ""),
                        "url": article.get("url", ""),
                        "source": article.get("source", {}).get("name", "NewsAPI"),
                        "timestamp": timestamp,
                        "author": article.get("author"),
                        "imageUrl": article.get("urlToImage")
                    })
                
                logger.info(f"✅ NewsAPI: Fetched {len(articles)} articles")
                return articles
        
        except Exception as e:
            logger.error(f"❌ NewsAPI failed: {e}")
            raise
    
    async def _fetch_from_cryptopanic(self, limit: int = 20) -> List[Dict[str, Any]]:
        """Fetch REAL news from CryptoPanic"""
        try:
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                response = await client.get(
                    f"{self.cryptopanic_url}/posts/",
                    params={
                        "auth_token": self.cryptopanic_token,
                        "public": "true",
                        "filter": "hot"
                    }
                )
                response.raise_for_status()
                data = response.json()
                
                articles = []
                for post in data.get("results", [])[:limit]:
                    # Parse timestamp
                    created_at = post.get("created_at", "")
                    try:
                        dt = datetime.fromisoformat(
                            created_at.replace("Z", "+00:00")
                        )
                        timestamp = int(dt.timestamp() * 1000)
                    except:
                        timestamp = int(datetime.utcnow().timestamp() * 1000)
                    
                    articles.append({
                        "title": post.get("title", ""),
                        "description": post.get("title", ""),  # CryptoPanic doesn't have description
                        "url": post.get("url", ""),
                        "source": post.get("source", {}).get("title", "CryptoPanic"),
                        "timestamp": timestamp
                    })
                
                logger.info(f"✅ CryptoPanic: Fetched {len(articles)} articles")
                return articles
        
        except Exception as e:
            logger.error(f"❌ CryptoPanic failed: {e}")
            raise
    
    async def _fetch_from_rss_feeds(self, limit: int = 20) -> List[Dict[str, Any]]:
        """Fetch REAL news from RSS feeds"""
        articles = []
        successful_sources = 0
        
        for source_name, feed_url in self.rss_feeds.items():
            try:
                # Parse RSS feed with timeout handling
                async with httpx.AsyncClient(timeout=self.timeout, follow_redirects=True) as client:
                    response = await client.get(feed_url)
                    response.raise_for_status()
                    
                # Parse RSS feed
                feed = feedparser.parse(response.text)
                
                if feed.bozo and feed.bozo_exception:
                    logger.warning(f"⚠️ RSS ({source_name}): Feed parsing warning: {feed.bozo_exception}")
                
                if not feed.entries:
                    logger.warning(f"⚠️ RSS ({source_name}): No entries found")
                    continue
                
                for entry in feed.entries[:limit]:
                    # Parse timestamp
                    try:
                        if hasattr(entry, "published_parsed") and entry.published_parsed:
                            dt = datetime(*entry.published_parsed[:6])
                        elif hasattr(entry, "updated_parsed") and entry.updated_parsed:
                            dt = datetime(*entry.updated_parsed[:6])
                        else:
                            dt = datetime.utcnow()
                        
                        timestamp = int(dt.timestamp() * 1000)
                    except Exception as ts_error:
                        logger.debug(f"Timestamp parsing failed for {source_name}: {ts_error}")
                        timestamp = int(datetime.utcnow().timestamp() * 1000)
                    
                    # Extract description
                    description = ""
                    if hasattr(entry, "summary"):
                        description = entry.summary[:300]
                    elif hasattr(entry, "description"):
                        description = entry.description[:300]
                    
                    articles.append({
                        "title": entry.get("title", "Untitled"),
                        "description": description,
                        "url": entry.get("link", ""),
                        "source": source_name.title(),
                        "timestamp": timestamp
                    })
                
                successful_sources += 1
                logger.info(
                    f"✅ RSS ({source_name}): Fetched {len(feed.entries)} articles"
                )
                
                if len(articles) >= limit:
                    break
            
            except httpx.HTTPError as e:
                logger.warning(f"⚠️ RSS feed {source_name} HTTP error: {e}")
                continue
            except Exception as e:
                logger.warning(f"⚠️ RSS feed {source_name} failed: {e}")
                continue
        
        if successful_sources > 0:
            logger.info(f"✅ Successfully fetched from {successful_sources}/{len(self.rss_feeds)} RSS sources")
        else:
            logger.error(f"❌ All RSS feeds failed")
        
        return articles[:limit]


# Global instance
crypto_news_client = CryptoNewsClient()


__all__ = ["CryptoNewsClient", "crypto_news_client"]