# coding=utf-8
from re import findall
import pandas as pd
import pytz
# date: 7/06/16
# created by: Greg Cohn
__authors__ = 'Greg Cohn'
__version__ = '1.0'
def _get_header_line(header, lineno, sep):
"""
Private function. Breaks header line into individual comma delimited parts and strips white space, and double
quotation marks.
Example::
['"#","Date","Time, GMT-08:00","Temp, \xc2\xb0C (LGR S/N: 920980, SEN S/N: 920980)","Intensity, Lux (LGR S/N:
920980, SEN S/N: 920980)"\n']
becomes
['#',
'Date',
'Time, GMT-08:00',
'Temp, \xc2\xb0C (LGR S/N: 920980, SEN S/N: 920980)',
'Intensity, Lux (LGR S/N: 920980, SEN S/N: 920980)']
:param header: array of header lines where each line is a single string.
:param lineno: int. Index of line number to be parsed
:return: list of header components from lineno.
"""
col_line = header[lineno]
if sep is ',' and col_line.count('"'):
sep = '","'
line = [s.strip('"') for s in col_line.strip().split(sep)]
return line
[docs]class HOBOdata:
"""
Load and process data from HOBO_ loggers produced by the ONSET company.
Handles csv files exported from the HoboWare program. The native format for HOBO loggers is a .hobo file. This
proprietary binary file is not handled here and must be converted to a csv.
This class syncs timesteps, checks time zones, and units, and converts where needed.
.. _HOBO : http://www.onsetcomp.com/hobo-data-loggers
"""
def __init__(self):
"""
"""
self.header = []
self.data = pd.DataFrame()
self.filename = ''
self.col = []
self.sep = ''
[docs] def get_csv_sn(self, header, lineno=-1):
"""
:param header: array of header lines where each line is a single string.
:param lineno: keyword argument. index of header array. Function operates on specified index. Default -1
:return: str containing serial number
"""
return findall("LGR S/N[^)]*", header[lineno])[0].split(':')[-1]
[docs] def get_csv_GMT_offset(self, header, lineno=-1):
"""
Get timezone as an offset from Greenwhich Mean Time from the header file
:param lineno: keyword argument. index of header array. Function operates on specified index. Default -1
:param header: array of header lines where each line is a single string.
:return: string of timezone offset from GMT
Example::
String for PST '-08:00'
"""
reFind_gmt = findall('GMT[^"]*', header[lineno])
if reFind_gmt:
gmt = reFind_gmt[0].split(':')
elif not reFind_gmt:
raise AttributeError('Required attribute: TIME ZONE not found in header!\nTo export time zone from '\
'HOBOware:\nGo to Preferences>>General>>Export Settings:\nDE-SELECT option, "No quotes or commas in' \
'headings, properties in parentheses"\n')
hr = float(gmt[0][3:])
hr_frac = float(gmt[-1])
hr += hr_frac
return hr
[docs] def get_csv_temp_unit(self, header, lineno=-1):
"""
Get unit for temperature records
:param header: array of header lines where each line is a single string.
:param lineno: keyword argument. index of header array. Function operates on specified index. Default -1
:return: str with single letter defining units for temperature.
"""
deg = findall('\xb0[^ ",]*', header[lineno])
return deg[-1]
[docs] def get_csv_intensity_unit(self, header, lineno=-1):
"""
Get unit for sunlight intensity
:param header: array of header lines where each line is a single string
:param lineno: keyword argument. index of header array. Function operates on specified index. Default -1
:return: str defining units for sunlight intensity
"""
intensity = findall('(?i)(Lux|lum/ft\xc2\xb2)', header[lineno])
return intensity
[docs] def get_csv_col(self, header, sep, lineno=-1):
"""
Extract column names from csv format.
From multiple header lines, this extracts a single line, and strips extra info, leaving only column names. File
delimiter is used to split header into columns, and ',' is used to split info within a column.
Example::
Singles string header:
['"#","Date","Time, GMT-08:00","Temp, \xc2\xb0C (LGR S/N: 920980, SEN S/N: 920980)","Intensity, Lux (LGR S/N:
920980, SEN S/N: 920980)"\\n']
becomes a list of column strings:
['#', 'Date', 'Time', 'Temp', 'Intensity']
:param header: array of header lines where each line is a single string.
:param lineno: keyword argument. index of header array. Function operates on specified index. Default -1
:return: array of column names.
"""
col = _get_header_line(header, lineno, sep)
col_edit = []
for c in col:
#str_wo_utf_head = c.decode("utf-8-sig").encode("utf-8")
first_of_parts =c.split(',')[0].split(' ')[0]
col_edit.append(first_of_parts)
return col_edit
[docs] def get_timestamp_col(self, col):
"""
Time stamps can be exported by HOBO into either 1 or 2 columns
:param col: an array of column names
:return: list of index locations
:return: list of column name(s) that make the timestamp
"""
i = 0
timestamp_i = []
timestamp_n = []
for c in col:
if 'Date' in c or 'Time' in c:
timestamp_i.append(i)
timestamp_n.append(c)
i += 1
if timestamp_i.__len__() > 1:
timestamp_col = timestamp_n[0] + '_' + timestamp_n[1]
timestamp_i = [timestamp_i]
elif not timestamp_i or not timestamp_n:
raise ValueError('No Date or Time column(s) found')
else:
timestamp_col = timestamp_n[0]
return timestamp_i, timestamp_col
[docs] def get_delimiter(self, header, lineno=-1):
"""
Find the delimiter used in the csv file.
AS of 3/9/21, the only possible delimiters when exporting from HOBOware are \t, ; and , . This method tests for
which one is used, and returns the answer.
:param header: array of header lines where each line is a single string.
:param lineno: keyword argument. index of header array. Function operates on specified index. Default -1
:return: str containing delimiter
"""
header_col = header[lineno]
possible_delimiters = [';', '\t', ',']
for d in possible_delimiters:
if d in header_col:
return d
raise KeyError('Cannot find valid delimiter.\nHOBOware only exports ";" , "\\t" , ","')
[docs] def load_csv_data(self, fname):
"""
Load csv file output by HOBO pendants into a Pandas DataFrame.
:param fname: str. Filepath of csv data file
"""
self.read_csv_header(fname)
skip_nrows = self.header.__len__()
self.sep = self.get_delimiter(self.header, lineno=-1)
col = self.get_csv_col(self.header, self.sep)
date_col_i, date_col_n = self.get_timestamp_col(col)
self.data = pd.read_csv(fname, delimiter=self.sep, parse_dates=date_col_i, skiprows=skip_nrows, names=col,
index_col=date_col_n)
self.col = col
[docs] def export_to_GCE_csv(self, csvname, units, tz):
"""
Export the HOBO data to a GCE_ friendly csv file
:param csvname: str. Filepath to output csv file
:param units: str. Units of output data. Example: 'SI'.
:param tz: float. GMT time zone of output data series. Example: -8.
.. _GCE : https://gce-lter.marsci.uga.edu/public/im/tools/data_toolbox.htm
"""
col = self.col
'''
export column is important for oddball HOBO settings that split timestamps btwn columns and add erroneous columns
such as:
.. Example::
'#',
'Date',
'Time',
'Temp',
'Intensity',
'Coupler Attached (LGR S/N: 10335619)',
'Stopped (LGR S/N: 10335619)',
'End Of File (LGR S/N: 10335619)'
'''
export_col = ['Date']
export_col.append('Temp') if 'Temp' in col else None
export_col.append('Intensity') if 'Intensity' in col else None
data = self.data
df = data.dropna(subset=export_col[1:])
data = None
df.loc[:, 'Date'] = df.index
if '#' in df.columns:
# record number present
df.set_index(keys='#', drop=True, inplace=True)
else:
# No record number present (reset_index() is 0 based, but same speed)
df.index = pd.RangeIndex(start=1, stop=len(df)+1, step=1)
df.index.rename('RecNum', inplace=True)
t_exp = pd.datetime.now(tz=pytz.utc).strftime('%Y-%m-%d %H:%M')
prog = __name__
prog_v = __version__
fname = self.filename
tz_orig = self.get_csv_GMT_offset(self.header)
header_str = '{fname} processed on {t_exp} UTC by {prog} v{prog_v}. Orig. record GMT {tz_orig}. Output file: \
GMT {tz}, {units} units, {csvname}\n'.format(**locals())
with open(csvname, 'w') as f:
f.write(header_str)
with open(csvname, mode='a') as f:
df.to_csv(f, columns=export_col, mode='a', date_format='%Y-%m-%d %H:%M', float_format='%g',
line_terminator='\n')
[docs] def set_data_GMT_offset(self, hr_offset):
"""
Define time zone of DataFrame timestamps in offset from UTC/GMT
:param hr_offset: floating point of time zone in hours difference from Greenwhich Mean Time
"""
ts = self.data
min_offset = hr_offset * 60
gmt_offset = pytz.FixedOffset(min_offset)
if ts.index.tz is None:
self.data = ts.tz_localize(gmt_offset)
else:
self.data = ts.tz_convert(gmt_offset)
[docs] def is_timezone_correct(self, tz):
"""
Check the timezone in which data was recorded against the expected timezone
:param tz: a timezone as number of hours offset from Greenwhich Mean Time
:return: Boolean
"""
#ts_str = str(tz)
gmt = self.get_csv_GMT_offset(self.header)
return True if tz == gmt else False
[docs] def is_temp_celsius(self):
"""
Read units definition from header and return true if units are celsius
:return: Boolean. True if temperature is recorded in celsius.
"""
units = self.get_csv_temp_unit(self.header)
return 'C' == units[-1]
[docs] def temp_F_to_C(self, temp):
"""
Convert temperature records from Fahrenheit
:param temp: a temperature value or list of temperature values in degrees fahrenheit.
:return: a temperature value or list of temperature values in degrees celsius
"""
return (temp-32)*5./9.
[docs] def is_intensity_lux(self):
"""
Read units definition from header and return True if units are Lux
:return: Boolean. True if light intensity is recorded in Lux
"""
units = self.get_csv_intensity_unit(self.header)
return 'lux' == units[0].lower()
[docs] def intensity_lumft2_to_lux(self, intensity):
"""
Convert light intensity records from lumen ft-2 into Lux
:param intensity: an intensity value or list of intensity values in lumen ft-2
:return: an intensity or list of intensity values in Lux
"""
return intensity*10.76391
if __name__ == "__main__":
# TEST HOBO LOAD
test = HOBOdata()
test.load_csv_data('E:\workspace\sensors\\verify\hobo_tests\\557_2013_150.csv')
x = HOBOdata()
x.load_csv_data('E:\workspace\sensors/verify\hobo_tests\RS12_2015_180_1___test.csv')
x.format_timezone(-8)
x.format_temp()
x.export_to_GCE_csv('E:\workspace\sensors/verify\hobo_tests\\New_outtest.csv')