Skip to content

Commit c7135a6

Browse files
authored
Merge pull request #33 from supermartzin/feature/portals/bezrealitky.cz
Bezrealitky.cz Ad portal revamped and ready to be used!
2 parents 3c74e22 + daedd52 commit c7135a6

File tree

2 files changed

+51
-28
lines changed

2 files changed

+51
-28
lines changed

Portals/RealEstatesWatcher.AdsPortals.BezrealitkyCz/BezrealitkyCzAdsPortal.cs

Lines changed: 49 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.Text.RegularExpressions;
2+
using System.Web;
23
using HtmlAgilityPack;
34
using Microsoft.Extensions.Logging;
45

@@ -8,40 +9,52 @@
89

910
namespace RealEstatesWatcher.AdsPortals.BezrealitkyCz;
1011

11-
public class BezrealitkyCzAdsPortal(string watchedUrl,
12-
IWebScraper webScraper,
13-
ILogger<BezrealitkyCzAdsPortal>? logger = null) : RealEstateAdsPortalBase(watchedUrl, webScraper, logger)
12+
public partial class BezrealitkyCzAdsPortal(string watchedUrl,
13+
IWebScraper webScraper,
14+
ILogger<BezrealitkyCzAdsPortal>? logger = null) : RealEstateAdsPortalBase(watchedUrl, webScraper, logger)
1415
{
16+
[GeneratedRegex("url=(.+?)&")]
17+
private static partial Regex ImageUrlRegex();
18+
1519
public override string Name => "Bezrealitky.cz";
1620

17-
protected override string GetPathToAdsElements() => "//article[contains(@class,\"product\")]";
21+
protected override string GetPathToAdsElements() => "(//section[contains(@class,'box')])[last()]/article";
1822

1923
protected override RealEstateAdPost ParseRealEstateAdPost(HtmlNode node) => new()
2024
{
2125
AdsPortalName = Name,
2226
Title = ParseTitle(node),
23-
Text = string.Empty,
27+
Text = ParseText(node),
2428
Price = ParsePrice(node),
2529
Currency = Currency.CZK,
2630
Layout = ParseLayout(node),
2731
Address = ParseAddress(node),
2832
WebUrl = ParseWebUrl(node),
2933
AdditionalFees = ParseAdditionalFees(node),
3034
FloorArea = ParseFloorArea(node),
31-
ImageUrl = ParseImageUrl(node, RootHost)
35+
ImageUrl = ParseImageUrl(node)
3236
};
3337

34-
private static string ParseTitle(HtmlNode node) => node.SelectSingleNode(".//p[@class=\"product__note\"]").InnerText;
38+
private static string ParseTitle(HtmlNode node)
39+
{
40+
var label = node.SelectSingleNode(".//span[contains(@class,'propertyCardLabel')]").InnerText;
41+
var address = node.SelectSingleNode(".//span[contains(@class,'propertyCardAddress')]").InnerText;
42+
43+
return $"{label} {address}";
44+
}
45+
46+
private static string ParseText(HtmlNode node) => node.SelectSingleNode(".//div[contains(@class,'propertyCardContent')]//p").InnerText;
47+
48+
private static string ParseAddress(HtmlNode node) => node.SelectSingleNode(".//span[contains(@class,'propertyCardAddress')]").InnerText;
49+
50+
private static Uri ParseWebUrl(HtmlNode node) => new(node.SelectSingleNode(".//h2[contains(@class,'propertyCardHeadline')]//a").GetAttributeValue("href", string.Empty));
3551

3652
private static decimal ParsePrice(HtmlNode node)
3753
{
38-
var value = node.SelectSingleNode(".//strong[@class=\"product__value\"]")?.InnerText;
39-
if (value == null)
54+
var value = node.SelectSingleNode(".//span[contains(@class,'propertyPriceAmount')]")?.InnerText;
55+
if (value is null)
4056
return decimal.Zero;
4157

42-
if (value.Contains('+'))
43-
value = value.Split('+')[0]; // get first value as primary
44-
4558
value = RegexMatchers.AllNonNumberValues().Replace(value, string.Empty);
4659

4760
return decimal.TryParse(value, out var price)
@@ -70,7 +83,11 @@ private static decimal ParseAdditionalFees(HtmlNode node)
7083

7184
private static Layout ParseLayout(HtmlNode node)
7285
{
73-
var value = node.SelectSingleNode(".//p[@class=\"product__note\"]").InnerText;
86+
var values = node.SelectNodes(".//li[contains(@class,'featuresListItem')]");
87+
if (values.Count != 2)
88+
return decimal.Zero;
89+
90+
var value = HttpUtility.HtmlDecode(values[0].InnerText);
7491

7592
var result = RegexMatchers.Layout().Match(value);
7693
if (!result.Success)
@@ -82,13 +99,13 @@ private static Layout ParseLayout(HtmlNode node)
8299
return LayoutExtensions.ToLayout(layoutValue);
83100
}
84101

85-
private static string ParseAddress(HtmlNode node) => node.SelectSingleNode(".//a[contains(@class,\"product__link\")]/strong").InnerText;
86-
87-
private static Uri ParseWebUrl(HtmlNode node) => new(node.SelectSingleNode(".//a[contains(@class,\"product__link\")]").GetAttributeValue("href", string.Empty));
88-
89102
private static decimal ParseFloorArea(HtmlNode node)
90103
{
91-
var value = ParseTitle(node);
104+
var values = node.SelectNodes(".//li[contains(@class,'featuresListItem')]");
105+
if (values.Count != 2)
106+
return decimal.Zero;
107+
108+
var value = HttpUtility.HtmlDecode(values[^1].InnerText);
92109

93110
var result = RegexMatchers.FloorArea().Match(value);
94111
if (!result.Success)
@@ -101,14 +118,20 @@ private static decimal ParseFloorArea(HtmlNode node)
101118
: decimal.Zero;
102119
}
103120

104-
private static Uri? ParseImageUrl(HtmlNode node, string hostUrlPart)
121+
private static Uri? ParseImageUrl(HtmlNode node)
105122
{
106-
var path = node.SelectSingleNode(".//div[@class=\"slick-list\"]//img")?.GetAttributeValue("src", null);
107-
if (path is null)
108-
return default;
109-
110-
return path.Contains(hostUrlPart)
111-
? new Uri(path)
112-
: new Uri(hostUrlPart + path);
123+
var values = HttpUtility.UrlDecode(node.SelectSingleNode(".//span[contains(@class,'image')]//img")?
124+
.GetAttributeValue("srcset", null));
125+
126+
if (values is null)
127+
return null;
128+
129+
var result = ImageUrlRegex().Match(values);
130+
if (!result.Success)
131+
return null;
132+
133+
var imageUrl = result.Groups[1].Value;
134+
135+
return new Uri(imageUrl);
113136
}
114137
}

RealEstatesWatcher.UI.Console/RealEstatesWatcher.UI.Console.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
<LangVersion>latest</LangVersion>
99
<Nullable>enable</Nullable>
1010
<ImplicitUsings>enable</ImplicitUsings>
11-
<Version>1.4.1</Version>
12-
<AssemblyVersion>1.4.1.0</AssemblyVersion>
11+
<Version>1.4.2</Version>
12+
<AssemblyVersion>1.4.2.0</AssemblyVersion>
1313
<Authors>Martin Vrábel</Authors>
1414
<Product>Real Estates Watcher</Product>
1515
<Description>Simple C# script for periodic watching of selected Real estate advertisement portals with notifications on new ads.</Description>

0 commit comments

Comments
 (0)