Shume shume keq data input. Done!

This commit is contained in:
2022-09-12 16:37:18 +02:00
parent b7c71eee3d
commit 991170c510
8 changed files with 33 additions and 13 deletions

View File

@@ -3,6 +3,7 @@ from os import listdir
from os.path import isfile, join
from docx.api import Document
from enum import Enum
from datetime import datetime
class Klinika(Enum):
@@ -12,7 +13,7 @@ class Klinika(Enum):
def convert(klinika: str):
print(klinika)
print(f"Filloi {klinika} me {datetime.now()}")
columns = []
filtered_columns = []
docx_path = './Word/'
@@ -26,6 +27,14 @@ def convert(klinika: str):
for path in list_paths:
document = Document(path)
for table in document.tables:
if "urgjenca_2016.docx" in path:
grid = table._tbl.find("w:tblGrid", table._tbl.nsmap)
for cell in table.column_cells(2):
cell._tc.getparent().remove(cell._tc)
col_elem = grid[2]
grid.remove(col_elem)
print(f"U FSHI PAVIONI")
for cell in table.rows[0].cells:
columns.append(cell.text.strip().replace('\n', ' '))
RowA = table.rows[0]
@@ -34,16 +43,17 @@ def convert(klinika: str):
for row in table.rows:
text = [cell.text for cell in row.cells]
df = df.append([text], ignore_index=True)
print(f'{path} Done')
print(f"Mbaroi {path} me {datetime.now()}")
for word in columns:
if word not in filtered_columns:
filtered_columns.append(word)
print(f'{len(filtered_columns)} Columns found')
print('writing to excel...')
print(f"Perfundoi {klinika} me {datetime.now()}")
df.columns = filtered_columns
excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx")
df.to_excel(excel_path, index=False)
convert(Klinika.OBS.value)
convert(Klinika.URGJENCA.value)