Skriptet dhe dokumentat
This commit is contained in:
BIN
Excel/raw_OBS-GYN_2016-2019.xlsx
Normal file
BIN
Excel/raw_OBS-GYN_2016-2019.xlsx
Normal file
Binary file not shown.
BIN
Excel/raw_kirurgji_2016-2019.xlsx
Normal file
BIN
Excel/raw_kirurgji_2016-2019.xlsx
Normal file
Binary file not shown.
BIN
Excel/raw_urgjenca_2016-2019.xlsx
Normal file
BIN
Excel/raw_urgjenca_2016-2019.xlsx
Normal file
Binary file not shown.
BIN
Word/OBS-GYN_2016.docx
Normal file
BIN
Word/OBS-GYN_2016.docx
Normal file
Binary file not shown.
BIN
Word/OBS-GYN_2017.docx
Normal file
BIN
Word/OBS-GYN_2017.docx
Normal file
Binary file not shown.
BIN
Word/OBS-GYN_2018.docx
Normal file
BIN
Word/OBS-GYN_2018.docx
Normal file
Binary file not shown.
BIN
Word/OBS-GYN_2019.docx
Normal file
BIN
Word/OBS-GYN_2019.docx
Normal file
Binary file not shown.
BIN
Word/kirurgji_2016.docx
Normal file
BIN
Word/kirurgji_2016.docx
Normal file
Binary file not shown.
BIN
Word/kirurgji_2017.docx
Normal file
BIN
Word/kirurgji_2017.docx
Normal file
Binary file not shown.
BIN
Word/kirurgji_2018.docx
Normal file
BIN
Word/kirurgji_2018.docx
Normal file
Binary file not shown.
BIN
Word/kirurgji_2019.docx
Normal file
BIN
Word/kirurgji_2019.docx
Normal file
Binary file not shown.
BIN
Word/urgjenca_2016.docx
Normal file
BIN
Word/urgjenca_2016.docx
Normal file
Binary file not shown.
BIN
Word/urgjenca_2017.docx
Normal file
BIN
Word/urgjenca_2017.docx
Normal file
Binary file not shown.
BIN
Word/urgjenca_2018.docx
Normal file
BIN
Word/urgjenca_2018.docx
Normal file
Binary file not shown.
BIN
Word/urgjenca_2019.docx
Normal file
BIN
Word/urgjenca_2019.docx
Normal file
Binary file not shown.
49
convert.py
Normal file
49
convert.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from os import listdir
|
||||||
|
from os.path import isfile, join
|
||||||
|
from docx.api import Document
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Klinika(Enum):
|
||||||
|
KIRURGJI = 'kirurgji'
|
||||||
|
OBS = 'OBS-GYN'
|
||||||
|
URGJENCA = 'urgjenca'
|
||||||
|
|
||||||
|
|
||||||
|
def convert(klinika: str):
|
||||||
|
print(klinika)
|
||||||
|
columns = []
|
||||||
|
filtered_columns = []
|
||||||
|
docx_path = './Word/'
|
||||||
|
xlsx_path = './Excel/'
|
||||||
|
list_files = [f for f in listdir(docx_path) if isfile(join(docx_path, f))]
|
||||||
|
list_paths = []
|
||||||
|
df = pd.DataFrame()
|
||||||
|
for file in list_files:
|
||||||
|
if klinika in file and "lock" not in file:
|
||||||
|
list_paths.append(join(docx_path, file))
|
||||||
|
for path in list_paths:
|
||||||
|
document = Document(path)
|
||||||
|
for table in document.tables:
|
||||||
|
for cell in table.rows[0].cells:
|
||||||
|
columns.append(cell.text.strip().replace('\n', ' '))
|
||||||
|
RowA = table.rows[0]
|
||||||
|
table_element = table._tbl
|
||||||
|
table_element.remove(RowA._tr)
|
||||||
|
for row in table.rows:
|
||||||
|
text = [cell.text for cell in row.cells]
|
||||||
|
df = df.append([text], ignore_index=True)
|
||||||
|
print(f'{path} Done')
|
||||||
|
|
||||||
|
for word in columns:
|
||||||
|
if word not in filtered_columns:
|
||||||
|
filtered_columns.append(word)
|
||||||
|
print(f'{len(filtered_columns)} Columns found')
|
||||||
|
print('writing to excel...')
|
||||||
|
df.columns = filtered_columns
|
||||||
|
excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx")
|
||||||
|
df.to_excel(excel_path, index=False)
|
||||||
|
|
||||||
|
|
||||||
|
convert(Klinika.OBS.value)
|
||||||
122
llogarit_moshen.py
Normal file
122
llogarit_moshen.py
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
import calendar
|
||||||
|
from os.path import join
|
||||||
|
import time
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Klinika(Enum):
|
||||||
|
KIRURGJI = 'kirurgji'
|
||||||
|
OBS = 'OBS-GYN'
|
||||||
|
URGJENCA = 'urgjenca'
|
||||||
|
|
||||||
|
|
||||||
|
klinika = Klinika.KIRURGJI.value
|
||||||
|
|
||||||
|
xlsx_path = './Excel/'
|
||||||
|
excel_path = join(xlsx_path, "raw_{klinika}_2016-2019.xlsx")
|
||||||
|
|
||||||
|
df = pd.read_excel(excel_path, sheet_name='Sheet1')
|
||||||
|
df = df.reset_index()
|
||||||
|
df['MOSHA'] = [None] * 2384
|
||||||
|
df['ERROR'] = [None] * 2384
|
||||||
|
i = 0
|
||||||
|
datat_pattern = "%d/%m/%y"
|
||||||
|
data_shtri_korigj = []
|
||||||
|
datelindja_korigj = []
|
||||||
|
|
||||||
|
|
||||||
|
def llogarit_moshen(datelindja: datetime, data_shtri: datetime):
|
||||||
|
return data_shtri.year - datelindja.year - ((data_shtri.month, data_shtri.day) < (datelindja.month, datelindja.day))
|
||||||
|
|
||||||
|
|
||||||
|
def konverto_data(datat: str, i: int):
|
||||||
|
# i +=1
|
||||||
|
datat_pattern = "%d/%m/%y"
|
||||||
|
try:
|
||||||
|
if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist":
|
||||||
|
df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR"
|
||||||
|
df.iat[i, df.columns.get_loc('ERROR')] = datat
|
||||||
|
return ""
|
||||||
|
if "vj" in datat:
|
||||||
|
datat = datat.replace("vjec", "")
|
||||||
|
datat = datat.replace("vj", "")
|
||||||
|
df.iat[i, df.columns.get_loc('MOSHA')] = int(datat)
|
||||||
|
return "DEKLARUAR"
|
||||||
|
if "muaj" in datat:
|
||||||
|
datat = datat.replace("muaj", "")
|
||||||
|
datat = int(datat) / 12
|
||||||
|
df.iat[i, df.columns.get_loc('MOSHA')] = float(datat)
|
||||||
|
return "DEKLARUAR"
|
||||||
|
datat = datat.split('/')
|
||||||
|
if len(datat) == 1:
|
||||||
|
datat.insert(0, '1')
|
||||||
|
datat.insert(1, '1')
|
||||||
|
if datat[0] == "0":
|
||||||
|
datat[0] = "1"
|
||||||
|
if len(datat) < 3 and len(datat) > 1:
|
||||||
|
datat.insert(0, '1')
|
||||||
|
if (len(datat)) > 2:
|
||||||
|
if datat[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9']:
|
||||||
|
datat[2] = f"0{datat[2]}"
|
||||||
|
if int(datat[2]) > 99:
|
||||||
|
datat_pattern = "%d/%m/%Y"
|
||||||
|
# Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019
|
||||||
|
elif int(datat[2]) > 19 and int(datat[2]) < 100:
|
||||||
|
datat[2] = f"19{datat[2]}"
|
||||||
|
datat_pattern = "%d/%m/%Y"
|
||||||
|
else:
|
||||||
|
datat_pattern = "%d/%m/%y"
|
||||||
|
datat = "/".join(datat)
|
||||||
|
try:
|
||||||
|
datat = datetime.strptime(datat, datat_pattern)
|
||||||
|
return datat
|
||||||
|
except ValueError as e:
|
||||||
|
print(f"{i}: {e} -- {datat}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except TypeError as e:
|
||||||
|
print(f"{i}: {e} -- {datat}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def pastro_string(datat: str, i: int):
|
||||||
|
datat = str(datat)
|
||||||
|
datat = datat.strip()
|
||||||
|
datat = datat.replace(" ", "")
|
||||||
|
datat = datat.replace(".", "/")
|
||||||
|
datat = datat.replace("'", "")
|
||||||
|
datat = konverto_data(datat, i)
|
||||||
|
return datat
|
||||||
|
|
||||||
|
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
data_shtri = row['DATA E SHTRIMIT']
|
||||||
|
datelindja = row['DATELINDJA']
|
||||||
|
emer = row['EMER']
|
||||||
|
data_shtri = pastro_string(data_shtri, i)
|
||||||
|
datelindja = pastro_string(datelindja, i)
|
||||||
|
if data_shtri is None or datelindja is None:
|
||||||
|
print(f"{i}: {data_shtri} --{emer}-- {datelindja}")
|
||||||
|
break
|
||||||
|
if type(data_shtri) is datetime and type(datelindja) is datetime:
|
||||||
|
mosha = llogarit_moshen(datelindja, data_shtri)
|
||||||
|
if mosha == 0:
|
||||||
|
df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR"
|
||||||
|
df.iat[i, df.columns.get_loc('ERROR')] = mosha
|
||||||
|
else:
|
||||||
|
df.iat[i, df.columns.get_loc('MOSHA')] = mosha
|
||||||
|
data_shtri_korigj.append(data_shtri)
|
||||||
|
datelindja_korigj.append(datelindja)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
df.insert(16, "DT_SHTRIMI_KORIGJ", data_shtri_korigj)
|
||||||
|
df.insert(17, "DATELINDJA_KORIGJ", datelindja_korigj)
|
||||||
|
df.drop(columns=df.columns[0], axis=1, inplace=True)
|
||||||
|
|
||||||
|
current_GMT = time.gmtime()
|
||||||
|
ts = calendar.timegm(current_GMT)
|
||||||
|
|
||||||
|
excel_path = join(xlsx_path, "{klinika}_{ts}.xlsx")
|
||||||
|
df.to_excel(excel_path, index=False)
|
||||||
Reference in New Issue
Block a user