From 6ba1efa883e2e680ff63cccb9ada5eb737924a2c Mon Sep 17 00:00:00 2001
From: Michael Van Ryn <mvanryn@mpe.ca>
Date: Thu, 15 Jun 2023 09:54:37 -0600
Subject: [PATCH] Some README documentation and code documentation

---
 README.md |  57 +++++++++++++++++++++-
 main.py   | 141 +++++++++++++++++++++++++++++-------------------------
 2 files changed, 132 insertions(+), 66 deletions(-)

diff --git a/README.md b/README.md
index f1ed71c..b07f7a2 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,58 @@
 # VTScada-HistoricalTools
 
-Key Point: In the tags list file, the Source Name field is the unique identifier for the tag name to query. In VTScada this can be something like ```temp\old_value1```. In ClearSCADA, it will be the unique point ID, ex. ```005152```. The leading zeroes can be left out as the script will pad them in front of the integere to determine the correct path.
+This is a set of tooling to perform useful operations of queryign and moving Historical data into ClearSCADA
+
+Currently supports querying data from the following sources, and their requirements
+* VTScada - REST
+* ClearSCADA - Raw Historic Files
+* Wonderware / AVEVA Historian - InSQL {Coming Soon}
+
+The primary function of this tooling is to query a set of tags for a specified date range, compress and process those values as required, and move it into a format which can be easily imported into VTScada.
+
+## Setup
+
+### TagMap file
+
+The TagMap file is a CSV file of which tags to query on the existing system. They are queried with the following paramaters.
+* Interval - time in seconds to query the data point
+* Deadband - values within this deadband before scaling will be elimianted
+* Scale Factor - scaling factor to apply to existing data before compression
+* Precision - the final result value will be rounded to this many decimal places
+
+For all values, values will either be working from a set of all data (for example, ClearSCADA), or querieid at the required interval (for example, VTScada REST). For the former, values within the interval will be deleted after compression.
+
+For boolean values, they will be **sampled** at the interval specified and compressed by eliminating values which stay the same. To ensure the precision of transitions wanted is kept, a high precision such as 1 second sampling is recommended here
+
+For integer/analog values, they will be **averaged** at the interval specified, and compressed by eliminating values within the deadband, then scaled, then rounded to the required precision. A lower precision of sampling is recommended here, perhaps 5 seconds for high-value points, and 30-60 seconds for low value points.
+
+### Setup TOML
+
+This file contains configuration on how to connect to the required existing interfaces and locations of input and the output data.
+
+### Python
+
+This system requires Python installed and run on the system which can target existing interfaces. Several libraries will need to be installed by pip.
+
+## VTScada REST
+
+This is a method of moving VTScada data into VTScada data. Scenarios where this could be useful:
+* Moving tag data from Analog Status / Digital Status to the modern IO tags
+* Tag data sizes which have gotten out of hand
+* A site has been recommissioned with a new set of tags and data needs to be imported
+
+## ClearSCADA - Raw Historic Files
+
+In places where targetting a live ClearSCADA system with SQL queries is challenging, ClearSCADA uses a file-based Historian and provides a utility which converts these HRD files into CSV data. 
+
+For each week of each data point, a separate CSV file of data is created.
+
+Files are generally stored:
+```C:\ProgramData\Schneider Electric\ClearSCADA\Database\HisFiles```
+
+Each directory contains a Historic XXXXXX directory where XXXXX is the Unique ID of the datapoint padded with a file WKYYYYYY where YYYYYY is the number of weeks since January 1, 1601 (yes, really). 
+
+These tools will conver the user start time and end time in a way that will only process the found and required HRD files at a time. This can *greatly* expand the amount of data in the system, it is strongly recommended to have a lot of free space left during queries.
+
+### Setup ClearSCADA Config 
+
+Key Point: In the tags list file, the Source Name field is the unique identifier for the tag name to query. In VTScada this can be something like ```temp\old_value1```. In ClearSCADA, it will be the unique point ID, ex. ```005152```. The leading zeroes can be left out as the script will pad them in front of the integer to determine the correct path.
diff --git a/main.py b/main.py
index b881ed3..d0f4bbb 100644
--- a/main.py
+++ b/main.py
@@ -42,7 +42,7 @@ class HistoricalTag:
         return f"({self.row}, {self.tag_type}, {self.name_source}, {self.name_dest}, {self.scale_factor}, {self.interval}, {self.precision}, {self.deadband})"
 
 # ----------------------
-#  Functions
+#  ClearSCADA Functions
 # ----------------------
 
 # clearscada_generate_historical_ids()
@@ -156,6 +156,79 @@ def clearscada_read_file(file_path: str) -> List[Union[int, float, None]]:
     return values
 
 
+# ----------------------
+#  VTScada Functions
+# ----------------------
+
+# vtscada_tag_query()
+# ----------------------
+# Given a HistoricalTag structure, query the tag's values from the start time to the end time
+
+
+def vtscada_tag_query(historical_tag: HistoricalTag, ft_start_time: datetime, ft_end_time: datetime) -> List[Union[int, float, None]]:
+    # Query average only for real values (Analog in VTScada)
+    if historical_tag.tag_type == "real":
+        value_string = ":Value:Average"
+    # Otherwise, query the value at the start of the interval
+    else:
+        value_string = ":Value:ValueAtStart"
+
+    query = "SELECT Timestamp, '" + historical_tag.name_source + value_string + "' FROM History_" + \
+        str(historical_tag.interval) + "s" + " WHERE Timestamp BETWEEN " + \
+        ft_start_time + " AND " + ft_end_time
+
+    url = "http://" + server + ":" + realm_port + \
+        "/" + realm_name + "/REST/SQLQuery?query=" + query
+
+    # print_text(url)
+
+    response = requests.get(url, auth=(application_user, application_pass))
+    returned = response.json()
+
+    return returned['results']['values']
+
+# vtscada_query()
+# ----------------------
+# Given the set of HistoricalTags and a start and end time, query the data of those tags from the
+# REST interface
+
+
+def vtscada_query(historical_tags: List[HistoricalTag], start_time: datetime, end_time: datetime):
+    current_start_time = start_time
+    current_end_time = start_time + timedelta(days=1)
+
+    while current_start_time < end_time:
+        print("Querying data for: " + str(current_start_time.year) + " " +
+              str(current_start_time.month) + " " + str(current_start_time.day))
+        dir_path = output_path + str(start_time.year) + "\\"
+        create_directory(dir_path)
+
+        ft_start_time = "'" + \
+            str(current_start_time.astimezone(timezone.utc)) + "'"
+        ft_end_time = "'" + \
+            str(current_end_time.astimezone(timezone.utc)) + "'"
+
+        tag_mappings = []
+
+        for tag in historical_tags:
+            values = vtscada_tag_query(tag, ft_start_time, ft_end_time)
+            output_file = prepare_file_for_tag(
+                tag, values, dir_path, current_end_time)
+
+            if output_file != "":
+                tag_mappings.append((output_file, tag.name_dest))
+
+        write_tagmapping_to_file(
+            dir_path + "TagMapping.csv", tag_mappings)
+
+        current_start_time += timedelta(days=1)
+        current_end_time += timedelta(days=1)
+
+# ----------------------
+#  Common Functions
+# ----------------------
+
+
 # compress_and_scale_real()
 # ----------------------
 # -- Deadband (only keeping values which change by the required amount)
@@ -236,6 +309,8 @@ def postprocess_values(values: List[Union[int, float, None]]):
 
 # prepare_file_for_tag()
 # ----------------------
+# Helper function to call the correct compressing and processing functions for a given tag and getting it written to
+# file
 
 
 def prepare_file_for_tag(tag: HistoricalTag, values: List[Union[int, float, None]], dir_path: str, current_end_time: datetime, append=False) -> str:
@@ -290,70 +365,6 @@ def read_tags(file_path: str) -> List[HistoricalTag]:
 
     return historical_tags
 
-# vtscada_tag_query()
-# ----------------------
-# Given a HistoricalTag structure, query the tag's values from the start time to the end time
-
-
-def vtscada_tag_query(historical_tag: HistoricalTag, ft_start_time: datetime, ft_end_time: datetime) -> List[Union[int, float, None]]:
-    # Query average only for real values (Analog in VTScada)
-    if historical_tag.tag_type == "real":
-        value_string = ":Value:Average"
-    # Otherwise, query the value at the start of the interval
-    else:
-        value_string = ":Value:ValueAtStart"
-
-    query = "SELECT Timestamp, '" + historical_tag.name_source + value_string + "' FROM History_" + \
-        str(historical_tag.interval) + "s" + " WHERE Timestamp BETWEEN " + \
-        ft_start_time + " AND " + ft_end_time
-
-    url = "http://" + server + ":" + realm_port + \
-        "/" + realm_name + "/REST/SQLQuery?query=" + query
-
-    # print_text(url)
-
-    response = requests.get(url, auth=(application_user, application_pass))
-    returned = response.json()
-
-    return returned['results']['values']
-
-# vtscada_query()
-# ----------------------
-# Given the set of HistoricalTags and a start and end time, query the data of those tags from the
-# REST interface
-
-
-def vtscada_query(historical_tags: List[HistoricalTag], start_time: datetime, end_time: datetime):
-    current_start_time = start_time
-    current_end_time = start_time + timedelta(days=1)
-
-    while current_start_time < end_time:
-        print("Querying data for: " + str(current_start_time.year) + " " +
-              str(current_start_time.month) + " " + str(current_start_time.day))
-        dir_path = output_path + str(start_time.year) + "\\"
-        create_directory(dir_path)
-
-        ft_start_time = "'" + \
-            str(current_start_time.astimezone(timezone.utc)) + "'"
-        ft_end_time = "'" + \
-            str(current_end_time.astimezone(timezone.utc)) + "'"
-
-        tag_mappings = []
-
-        for tag in historical_tags:
-            values = vtscada_tag_query(tag, ft_start_time, ft_end_time)
-            output_file = prepare_file_for_tag(
-                tag, values, dir_path, current_end_time)
-
-            if output_file != "":
-                tag_mappings.append((output_file, tag.name_dest))
-
-        write_tagmapping_to_file(
-            dir_path + "TagMapping.csv", tag_mappings)
-
-        current_start_time += timedelta(days=1)
-        current_end_time += timedelta(days=1)
-
 
 # write_tagmappings_to_file()
 # ----------------------