Create Excel Scatter

Source Code

create_excel_scatter.py
  1"""Create an excel file with a wave scatter table.
  2
  3This example shows how to create a color-coded Excel scatter table visualizing the joint distribution 
  4of significant wave height (Hs) and peak period (Tp) from metocean data.
  5
  6Features:
  7    Data Retrieval:
  8        Uses metocean_api to get wave data for a specific location and time period
  9    Statistical Analysis:
 10        Calculates the joint frequency distribution (scatter table) using metocean_stats
 11    Visualization:
 12        Creates an Excel file with color-coded cells representing occurrence probabilities
 13
 14Requirements:
 15    - openpyxl for Excel file creation
 16    - pandas for data handling
 17    - metocean_api and metocean_stats for data retrieval and analysis
 18
 19Example:
 20    Basic usage of this script:
 21    
 22    >>> python create_excel_scatter.py
 23
 24Note:
 25    Requires pandas and openpyxl to be installed:
 26    
 27    >>> pip install pandas openpyxl
 28"""
 29from pathlib import Path
 30import pandas as pd
 31import openpyxl
 32from openpyxl.styles import PatternFill
 33from openpyxl.worksheet.worksheet import Worksheet
 34from metocean_api import ts
 35from metocean_stats.stats.general import calculate_scatter
 36
 37
 38
 39class ScatterExcelWriter:
 40    """Write a scatter table to an excel file"""
 41
 42    def __init__(self, scatter: pd.DataFrame, row_name: str, column_name: str):
 43        self.scatter = scatter
 44        self.row_name = row_name
 45        self.column_name = column_name
 46        self.workbook = openpyxl.Workbook()
 47        self.sheet: Worksheet = self.workbook.active
 48
 49    def __get_color(self, occurence: int, total: int) -> str:
 50        """Get a color for the cell based on the occurence and the total number of occurences"""
 51        if occurence == 0:
 52            return None
 53
 54        prob = occurence / total
 55        # scale the probability to shift it towards the red end of the spectrum to exagerate the small values
 56        value = min(1.0, 5 * prob)
 57        red = int(255 * value)
 58        green = int(255 * (1 - value))
 59        blue = 0
 60        chex = [f"{i:02x}" for i in [red, green, blue]]
 61        return "".join(chex)
 62
 63    def write_occurences(self):
 64        """Write the occurences to the excel file"""
 65        upper_row = self.scatter.index
 66        upper_column = self.scatter.columns
 67        occurences = self.scatter.values
 68        header = (
 69            [f"{self.row_name}/{self.column_name}"] + upper_column.tolist() + ["Sum"]
 70        )
 71        self.sheet.append(header)
 72        total_sum = occurences.sum()
 73        for i, occ in enumerate(occurences):
 74            row = [upper_row[i]] + occ.tolist() + [occ.sum()]
 75            self.sheet.append(row)
 76            for j, cell in enumerate(occ):
 77                color = self.__get_color(cell, total_sum)
 78                if color:
 79                    self.sheet.cell(i + 2, j + 2).fill = PatternFill(
 80                        "solid", start_color=color
 81                    )
 82
 83        footer = ["Sum"] + occurences.sum(axis=0).tolist() + [total_sum]
 84        self.sheet.append(footer)
 85
 86    def append(self, row):
 87        """Append a row to the excel file"""
 88        self.sheet.append(row)
 89
 90    def save(self, filename):
 91        """Save the excel file"""
 92        self.workbook.save(filename)
 93
 94
 95def write_scatter():
 96    """Write a scatter table to an excel file"""
 97    lat_pos = 64.1154
 98    lon_pos = 7.8877
 99    start_date = "2020-10-21"
100    end_date = "2020-11-21"
101
102    product = "NORA3_wave_sub"
103    var1_name = "hs"
104    var2_name = "tp"
105
106    requested_values = [var1_name, var2_name]
107
108    df_ts = ts.TimeSeries(
109        lon=lon_pos,
110        lat=lat_pos,
111        start_time=start_date,
112        end_time=end_date,
113        variable=requested_values,
114        product=product,
115    )
116
117    df_ts.import_data(save_csv=True, save_nc=False, use_cache=True)
118
119    block_size = 1.0
120
121    scatter: pd.DataFrame = calculate_scatter(df_ts.data, var1_name, block_size, var2_name, block_size)
122
123    writer = ScatterExcelWriter(scatter, var1_name, var2_name)
124    writer.write_occurences()
125    writer.append([])
126
127    output_dir = Path("./output/simamet")
128    output_dir.mkdir(exist_ok=True, parents=True)
129
130    # Save the Excel file
131    path = output_dir / f"scatter_{product}-{start_date}-{end_date}.xlsx"
132    writer.save(path)
133
134
135if __name__ == "__main__":
136    write_scatter()