#************************************ @app.route('/api/split-file', methods=['POST']) def split_from_file(): # Mostra sempre il corpo ricevuto grezzo #raw_input = request.get_data(as_text=True) #print("📥 Corpo ricevuto (raw):", raw_input, flush=True) try: # Verifica se il file è presente if 'file' not in request.files: return jsonify({"error": "File mancante"}), 400 file = request.files['file'] category = request.headers.get('category', 'uncategorized') environment = request.form.get('environment', 'default_env') # Salva temporaneamente il file filename = secure_filename(file.filename) temp_path = os.path.join("/tmp", filename) file.save(temp_path) # Leggi e normalizza il contenuto with open(temp_path, "r", encoding="utf-8") as f: input_text = f.read() input_text = normalize_text(input_text) # Rimuove il file temporaneo os.remove(temp_path) # Crea documento doc = Document(page_content=input_text, metadata={"environment": environment}) # Segmenta splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ".", " ", ""] ) chunks = splitter.split_documents([doc]) result = [] for i, chunk in enumerate(chunks): cleaned_text = chunk.page_content \ .replace('\n', ' ') \ .replace('\r', ' ') \ .replace('[', '') \ .replace(']', '') result.append({ "_id": f"rec{i+1}", "text": cleaned_text.strip(), "category": category }) # Restituisce lo stesso contenuto come NDJSON ndjson_output = '\n'.join(json.dumps(item, ensure_ascii=False) for item in result) return app.response_class(ndjson_output, content_type='application/json') except Exception as e: return jsonify({"error": str(e)}), 500 #***************************************